def test_getURLWhole(self): codeList = [line.strip() for line in open("/Users/jinxuanwu/PycharmProjects/virtual_env/jobids.txt", "r")] for code in codeList: try: print getURL(str(code)[0:2] + "-" + str(code[2:])) print "--------------------" except ValueError: print code
def test_getURLWhole(self): codeList = [ line.strip() for line in open( "/Users/jinxuanwu/PycharmProjects/virtual_env/jobids.txt", "r") ] for code in codeList: try: print getURL(str(code)[0:2] + "-" + str(code[2:])) print "--------------------" except ValueError: print code
def getModified(cDir, year, month, modifiedInfo): prefix = cDir + str(year) + "-" + "%02d" % month file = prefix + "-commitsInfo.json" with open(file, "r") as f: data = f.read() data = json.loads(data) modifiedInfo = [] for commit in data: commitUrl = commit["url"] #print("commitUrl:",commitUrl) urlResponse = getURL.getURL(commitUrl) #urlResponse = request.urlopen(commitUrl + "?access_token=8f6085fc4cf4b501a7ccad1a3aadc3f98f51384a") commitInfo = json.loads(urlResponse.data) #commitInfo = getURL.getURL(commitUrl) user = commitInfo["commit"]["author"]["email"] date = commitInfo["commit"]["author"]["date"] modifiedFiles = commitInfo["files"] for i in range(0, len(modifiedFiles)): if ("patch" in modifiedFiles[i]): modifiedFiles[i].pop("patch") modifiedFiles[i]["user"] = user modifiedFiles[i]["date"] = date modifiedInfo = modifiedInfo + modifiedFiles with open(prefix + "-modifiedFiles.json", "w") as f: json.dump(modifiedInfo, f)
def queryHO(url, data = ""): if len(data) > 0: urls = url + "?" + data path = "./raw/" + url.replace("/", "-") + ".html" else: path = "./raw/" + url.replace("/", "-") + ".html" if os.path.exists(path): f = open(str(path), 'rt') code = f.read() f.close() else: print data url_STRING = "http://www.history.ac.uk/"+url #url_STRING = "http://algorythme.net/post.php" code = getURL(url_STRING, data, "GET") code = code.read() f = open(str(path), 'wt') f.write(code) f.close() print url + " has been searched" return code
def test_URL(self): INPUT_URLS = ['https://globalnews.ca/news/7007527/coronavirus-cases-saskatchewan-may-20/', 'https://globalnews.ca/news/7007402/coronavirus-1-new-case-people-in-campbellton-symptom-watch/'] OUTPUT_URLS = [output_urls, output_urls] for input, output in zip(INPUT_URLS, OUTPUT_URLS): self.assertEqual(getURL(input), output)
def getAllComments(repo): cDir = "public/data/" + repo+ "/" + "comments/" if(os.path.exists(cDir)): shutil.rmtree(cDir) os.makedirs(cDir) url = 'https://api.github.com/repos/'+repo+ \ "/comments?access_token=8f6085fc4cf4b501a7ccad1a3aadc3f98f51384a" commentResponse = getURL.getURL(url) commentsData = json.loads(commentResponse.data) ''' urlRequest = request.Request(url) urlResponse = request.urlopen(urlRequest) commentsData = json.loads(urlResponse.read().decode("utf-8")) ''' page = 1 with open(cDir + "allComments-" + str(page) + ".json","w") as f: json.dump(commentsData,f) headData = str(commentResponse.headers) while True: listLink = re.findall(r'(?<=<).[^<]*(?=>; rel=\"next)',headData) print("listLink: " , listLink) if(listLink): nextLink = listLink[0] print("nextLink: " , nextLink) time.sleep(1) page += 1 commentResponse = getURL.getURL(nextLink) commentsData = json.loads(commentResponse.data) ''' commentsResponse = request.urlopen(nextLink) commentsData = commentsResponse.read().decode('utf-8') commentsData = json.loads(commentsData) ''' with open(cDir + "allComments-" + str(page) + ".json","w") as f: json.dump(commentsData,f) headData = str(commentResponse.headers) else: break
def main(): title, file = getURL() exention = [] for i in range(len(file)): a = re.findall(r'[mpd4f]{3}', str(file[i]))[0] exention.append(a) real_loc = getLocation() for i in range(len(file)): file_name = real_loc + title[i] + '.' + exention[i] try: print('正在下载:' + file_name) urllib.request.urlretrieve(file[i], file_name) continue except FileNotFoundError as e: print('保存位置出错,请联系开发者' + e) finally: pass
def main(): root = Tk() root.geometry("100x100") scrollbar = Scrollbar(root) scrollbar.pack(side=RIGHT, fill=Y) textFrame = Frame(root) textFrame.pack(side=BOTTOM) url = 'https://www.websitebuilderexpert.com/how-to-choose-color-for-your-website/' #url = 'http://www.catallianceteam.org/' html = getURL.getURL(url) print(getBackground(html)) displayText = Text(textFrame, yscrollcommand=scrollbar.set) scrollbar.config(command=displayText.yview) displayText = Text(root, background=getBackground(html, url)) displayText.pack() root.mainloop()
def getCommit(repo, startDate, endDate): prefix = 'public/data/' + repo + '/' print("I am downloadCommits.py.") print(prefix) url = 'https://api.github.com/repos/' + repo + "?access_token=8f6085fc4cf4b501a7ccad1a3aadc3f98f51384a" ''' urlRequest = request.Request(url) urlResponse = request.urlopen(urlRequest) data = urlResponse.read().decode('utf-8') data = json.loads(data) ''' data = json.loads(getURL.getURL(url).data) startDate = time.strptime(startDate[0:10], "%Y-%m-%d") endDate = time.strptime(endDate[0:10], "%Y-%m-%d") startYear = startDate.tm_year endYear = endDate.tm_year startMonth = startDate.tm_mon endMonth = endDate.tm_mon if (startYear == endYear): for month in range(startMonth, endMonth): fetchCommit(startYear, month, data, prefix) return for month in range(startMonth, 13): fetchCommit(startYear, month, data, prefix) for year in range(startYear + 1, endYear): for month in range(1, 13): fetchCommit(year, month, data, prefix) for month in range(1, endMonth): fetchCommit(endYear, month, data, prefix) for month in range(endMonth, 13): paddingCommit(endYear, month, data, prefix)
def test_background(): html = getURL.getURL( "https://www.websitebuilderexpert.com/how-to-choose-color-for-your-website/" ) assert tkinterBackground.getBackground(html) == "#ebedf3"
def test_test_background2(): html = getURL.getURL("https://www.awwwards.com/websites/colorful/") assert tkinterBackground.getBackground(html) == "white"
userName = "******" passwd = "8269202" DBName = "bullhorn" db = MySQLdb.connect(mySQLUrl, userName, passwd, DBName, charset='utf8', use_unicode=True) cursor = db.cursor() codeDescriptionList = [line.strip() for line in open("job_id_titles.txt", "r")] #file = open("job_id_title_description", "w+") for each_item in codeDescriptionList: gensimIndex = each_item.index(',') code = each_item[:gensimIndex].strip() jobtitles = each_item[gensimIndex + 1:] try: url = getURL(code) description = getContent(url) integerCode = int(code.replace('-', '')) #file.write(each_item + ',' + description + '\n') sql = """ INSERT INTO SOC_JOBTITLE VALUES ('%d', '%s', '%s'); """ % (integerCode, jobtitles.replace('\'', ''), description.replace('\'', '')) print sql cursor.execute(sql) db.commit() except ValueError: print "value err" cursor.close() db.close()
def test(): #url = 'https://www.websitebuilderexpert.com/how-to-choose-color-for-your-website/' url = 'http://www.catallianceteam.org/' html = getURL.getURL(url) print(getBackground(html))
def parseURL(url): css = getURL.getURL(url).replace("\t", "").replace("\n", "") css = removeComments(css) #return makeDict(css) return parseEntries(css)
def test_getURL(): assert getURL.getURL("https://rtsfred3.github.io/echoo/").strip() == """<!DOCTYPE html>
def fetchCommit(year, month, data, prefix): allUsers = {} if (os.path.exists(prefix + 'allUsers.json')): with open(prefix + 'allUsers.json', 'r') as f: allUsers = json.loads(f.read()) thisMonth = str(year) + "-" + "%02d" % month start = thisMonth + '-01T00:00:00Z' if (month == 12): end = thisMonth + "-31T23:59:59Z" else: end = str(year) + "-" + "%02d" % (month + 1) + '-01T00:00:00Z' commitsUrl = data["commits_url"][ 0: -6] + "?since=" + start + "&until=" + end + "&access_token=8f6085fc4cf4b501a7ccad1a3aadc3f98f51384a" thisMonthUser = {} #time.sleep(3) commitsResponse = getURL.getURL(commitsUrl) #commitsData = commitsResponse.read().decode('utf-8') commitsData = json.loads(commitsResponse.data) # commitsData = getURL.getURL(commitsUrl) countCommits = 0 dealdData = [] headData = str(commitsResponse.headers) originalCommits = commitsData for item in commitsData: tmpData = getCommitData(item) dealdData.append(tmpData) while True: listLink = re.findall(r'(?<=<).[^<]*(?=>; rel=\"next)', headData) print("listLink: ", listLink) if (listLink): nextLink = listLink[0] print("nextLink: ", nextLink) #time.sleep(3) commitsResponse = getURL.getURL( nextLink) #request.urlopen(nextLink) commitsData = commitsResponse.data commitsData = json.loads(commitsData) originalCommits = originalCommits + commitsData headData = str(commitsResponse.headers) for item in commitsData: tmpData = getCommitData(item) dealdData.append(tmpData) else: break for item in dealdData: user = str(item['author']['email']) commitInfo = {} commitInfo['date'] = item['author']['date'] commitInfo['message'] = item['message'] commitInfo['url'] = item["url"] if (user not in thisMonthUser): thisMonthUser[user] = [] if (user not in allUsers): allUsers[user] = [] thisMonthUser[user].append(commitInfo) allUsers[user].append(commitInfo) with open(prefix + thisMonth + "-commitsUser.json", 'w') as f: json.dump(thisMonthUser, f) with open(prefix + thisMonth + "-commitsInfo.json", 'w') as f: json.dump(dealdData, f) with open(prefix + thisMonth + "-originalCommits.json", 'w') as f: json.dump(originalCommits, f) with open(prefix + 'allUsers.json', 'w') as f: json.dump(allUsers, f)
from getContent import getContent from getURL import getURL import MySQLdb codeDescriptionList = [line.strip() for line in open("job_id_titles.txt", "r")] file = open("job_id_title_description", "w+") for each_item in codeDescriptionList: gensimIndex = each_item.index(',') code = each_item[:gensimIndex].strip() jobtitles = each_item[gensimIndex + 1:] try: url = getURL(code) description = getContent(url) file.write(code + "|" + jobtitles + "|" + description + '\n') except ValueError: print "value err" file.close()
from download_compare import download_compare import commands # to be set download_directory = "~/Download" options = Options() # ブラウザ立ち上げを無効にする # options.set_headless(True) word = '電通 過労死' # make directory commands.getoutput('mkdir ./data/' + word) url = getURL(word) print url splitword = word.split(' ') #アクセスするURL # url = 'https://app.mieru-ca.com/faber-extract/suggest-keyword-network?keyword=%E9%9B%BB%E9%80%9A&input=google_JP&action=view' # url = 'https://app.mieru-ca.com/faber-extract/suggest-keyword-network?keyword=%E5%B0%B1%E6%B4%BB%E3%80%80%E8%85%95%E6%99%82%E8%A8%88&input=google_JP&action=view' loginurl = 'https://app.mieru-ca.com/faber-extract/' driver = webdriver.Chrome(chrome_options=options) driver.get(loginurl) # ID/PASSを入力 id = driver.find_element_by_id("txt_email_login")