def test_getURLWhole(self):
     codeList = [line.strip() for line in open("/Users/jinxuanwu/PycharmProjects/virtual_env/jobids.txt", "r")]
     for code in codeList:
         try:
             print getURL(str(code)[0:2] + "-" + str(code[2:]))
             print "--------------------"
         except ValueError:
             print code
 def test_getURLWhole(self):
     codeList = [
         line.strip() for line in open(
             "/Users/jinxuanwu/PycharmProjects/virtual_env/jobids.txt", "r")
     ]
     for code in codeList:
         try:
             print getURL(str(code)[0:2] + "-" + str(code[2:]))
             print "--------------------"
         except ValueError:
             print code
Ejemplo n.º 3
0
def getModified(cDir, year, month, modifiedInfo):

    prefix = cDir + str(year) + "-" + "%02d" % month

    file = prefix + "-commitsInfo.json"

    with open(file, "r") as f:
        data = f.read()
        data = json.loads(data)
    modifiedInfo = []

    for commit in data:
        commitUrl = commit["url"]

        #print("commitUrl:",commitUrl)
        urlResponse = getURL.getURL(commitUrl)
        #urlResponse = request.urlopen(commitUrl + "?access_token=8f6085fc4cf4b501a7ccad1a3aadc3f98f51384a")
        commitInfo = json.loads(urlResponse.data)
        #commitInfo = getURL.getURL(commitUrl)

        user = commitInfo["commit"]["author"]["email"]
        date = commitInfo["commit"]["author"]["date"]

        modifiedFiles = commitInfo["files"]

        for i in range(0, len(modifiedFiles)):
            if ("patch" in modifiedFiles[i]):
                modifiedFiles[i].pop("patch")
            modifiedFiles[i]["user"] = user
            modifiedFiles[i]["date"] = date

        modifiedInfo = modifiedInfo + modifiedFiles

    with open(prefix + "-modifiedFiles.json", "w") as f:
        json.dump(modifiedInfo, f)
Ejemplo n.º 4
0
def queryHO(url, data = ""):
	if len(data) > 0:
		urls = url + "?" + data
		path = "./raw/" + url.replace("/", "-") + ".html"
	else:
		path = "./raw/" + url.replace("/", "-") + ".html"
		
	if os.path.exists(path):
		f = open(str(path), 'rt') 
		code = f.read()
		f.close()
	else:
		print data
		url_STRING = "http://www.history.ac.uk/"+url
		#url_STRING = "http://algorythme.net/post.php"
		code = getURL(url_STRING, data, "GET")
		code = code.read()
		
		
		f = open(str(path), 'wt') 
		f.write(code)
		f.close()

	print url + " has been searched"
	
	return code
Ejemplo n.º 5
0
    def test_URL(self):
        INPUT_URLS = ['https://globalnews.ca/news/7007527/coronavirus-cases-saskatchewan-may-20/',
                      'https://globalnews.ca/news/7007402/coronavirus-1-new-case-people-in-campbellton-symptom-watch/']
        OUTPUT_URLS = [output_urls, output_urls]

        for input, output in zip(INPUT_URLS, OUTPUT_URLS):
            self.assertEqual(getURL(input), output)
Ejemplo n.º 6
0
def getAllComments(repo):
    cDir = "public/data/" + repo+ "/" + "comments/"
    if(os.path.exists(cDir)):
        shutil.rmtree(cDir)
    os.makedirs(cDir)
    url = 'https://api.github.com/repos/'+repo+ \
            "/comments?access_token=8f6085fc4cf4b501a7ccad1a3aadc3f98f51384a"
    commentResponse = getURL.getURL(url)
    commentsData = json.loads(commentResponse.data)

    '''
    urlRequest = request.Request(url)
    urlResponse = request.urlopen(urlRequest)
    commentsData = json.loads(urlResponse.read().decode("utf-8"))
    '''
    page = 1

    with open(cDir + "allComments-" + str(page) + ".json","w") as f:
        json.dump(commentsData,f)

    headData = str(commentResponse.headers)
    while True:
        listLink = re.findall(r'(?<=<).[^<]*(?=>; rel=\"next)',headData)
        print("listLink: " , listLink)
        if(listLink):
            nextLink = listLink[0]
            print("nextLink: " , nextLink)
            time.sleep(1)
            page += 1
            commentResponse = getURL.getURL(nextLink)
            commentsData = json.loads(commentResponse.data)
            '''
            commentsResponse = request.urlopen(nextLink)
            commentsData = commentsResponse.read().decode('utf-8')
            commentsData = json.loads(commentsData)
            '''
            with open(cDir + "allComments-" + str(page) + ".json","w") as f:
                json.dump(commentsData,f)

            headData = str(commentResponse.headers)

        else:
            break
Ejemplo n.º 7
0
def main():
    title, file = getURL()
    exention = []
    for i in range(len(file)):
        a = re.findall(r'[mpd4f]{3}', str(file[i]))[0]
        exention.append(a)
    real_loc = getLocation()
    for i in range(len(file)):
        file_name = real_loc + title[i] + '.' + exention[i]
        try:
            print('正在下载:' + file_name)
            urllib.request.urlretrieve(file[i], file_name)
            continue
        except FileNotFoundError as e:
            print('保存位置出错,请联系开发者' + e)
        finally:
            pass
Ejemplo n.º 8
0
def main():
    root = Tk()
    root.geometry("100x100")

    scrollbar = Scrollbar(root)
    scrollbar.pack(side=RIGHT, fill=Y)

    textFrame = Frame(root)
    textFrame.pack(side=BOTTOM)

    url = 'https://www.websitebuilderexpert.com/how-to-choose-color-for-your-website/'
    #url = 'http://www.catallianceteam.org/'
    html = getURL.getURL(url)
    print(getBackground(html))

    displayText = Text(textFrame, yscrollcommand=scrollbar.set)
    scrollbar.config(command=displayText.yview)
    displayText = Text(root, background=getBackground(html, url))
    displayText.pack()

    root.mainloop()
Ejemplo n.º 9
0
def getCommit(repo, startDate, endDate):
    prefix = 'public/data/' + repo + '/'
    print("I am downloadCommits.py.")
    print(prefix)

    url = 'https://api.github.com/repos/' + repo + "?access_token=8f6085fc4cf4b501a7ccad1a3aadc3f98f51384a"
    '''
    urlRequest = request.Request(url)
    urlResponse = request.urlopen(urlRequest)

    data = urlResponse.read().decode('utf-8')
    data = json.loads(data)
    '''
    data = json.loads(getURL.getURL(url).data)

    startDate = time.strptime(startDate[0:10], "%Y-%m-%d")
    endDate = time.strptime(endDate[0:10], "%Y-%m-%d")

    startYear = startDate.tm_year
    endYear = endDate.tm_year
    startMonth = startDate.tm_mon
    endMonth = endDate.tm_mon

    if (startYear == endYear):
        for month in range(startMonth, endMonth):
            fetchCommit(startYear, month, data, prefix)
        return

    for month in range(startMonth, 13):
        fetchCommit(startYear, month, data, prefix)

    for year in range(startYear + 1, endYear):
        for month in range(1, 13):
            fetchCommit(year, month, data, prefix)

    for month in range(1, endMonth):
        fetchCommit(endYear, month, data, prefix)
    for month in range(endMonth, 13):
        paddingCommit(endYear, month, data, prefix)
Ejemplo n.º 10
0
def test_background():
    html = getURL.getURL(
        "https://www.websitebuilderexpert.com/how-to-choose-color-for-your-website/"
    )
    assert tkinterBackground.getBackground(html) == "#ebedf3"
Ejemplo n.º 11
0
def test_test_background2():
    html = getURL.getURL("https://www.awwwards.com/websites/colorful/")
    assert tkinterBackground.getBackground(html) == "white"
userName = "******"
passwd = "8269202"
DBName = "bullhorn"


db = MySQLdb.connect(mySQLUrl, userName, passwd, DBName, charset='utf8', use_unicode=True)
cursor = db.cursor()

codeDescriptionList = [line.strip() for line in open("job_id_titles.txt", "r")]
#file = open("job_id_title_description", "w+")
for each_item in codeDescriptionList:
    gensimIndex = each_item.index(',')
    code = each_item[:gensimIndex].strip()
    jobtitles = each_item[gensimIndex + 1:]
    try:
        url = getURL(code)
        description = getContent(url)
        integerCode = int(code.replace('-', ''))
        #file.write(each_item + ',' + description + '\n')
        sql = """
            INSERT INTO SOC_JOBTITLE
            VALUES ('%d', '%s', '%s');
        """ % (integerCode, jobtitles.replace('\'', ''), description.replace('\'', ''))
        print sql
        cursor.execute(sql)
        db.commit()
    except ValueError:
        print "value err"

cursor.close()
db.close()
Ejemplo n.º 13
0
def test():
    #url = 'https://www.websitebuilderexpert.com/how-to-choose-color-for-your-website/'
    url = 'http://www.catallianceteam.org/'
    html = getURL.getURL(url)

    print(getBackground(html))
Ejemplo n.º 14
0
def parseURL(url):
    css = getURL.getURL(url).replace("\t", "").replace("\n", "")
    css = removeComments(css)
    #return makeDict(css)
    return parseEntries(css)
Ejemplo n.º 15
0
def test_getURL():
    assert getURL.getURL("https://rtsfred3.github.io/echoo/").strip() == """<!DOCTYPE html>
Ejemplo n.º 16
0
def fetchCommit(year, month, data, prefix):
    allUsers = {}
    if (os.path.exists(prefix + 'allUsers.json')):
        with open(prefix + 'allUsers.json', 'r') as f:
            allUsers = json.loads(f.read())

    thisMonth = str(year) + "-" + "%02d" % month
    start = thisMonth + '-01T00:00:00Z'
    if (month == 12):
        end = thisMonth + "-31T23:59:59Z"
    else:
        end = str(year) + "-" + "%02d" % (month + 1) + '-01T00:00:00Z'

    commitsUrl = data["commits_url"][
        0:
        -6] + "?since=" + start + "&until=" + end + "&access_token=8f6085fc4cf4b501a7ccad1a3aadc3f98f51384a"

    thisMonthUser = {}
    #time.sleep(3)
    commitsResponse = getURL.getURL(commitsUrl)
    #commitsData = commitsResponse.read().decode('utf-8')
    commitsData = json.loads(commitsResponse.data)
    # commitsData = getURL.getURL(commitsUrl)
    countCommits = 0
    dealdData = []

    headData = str(commitsResponse.headers)

    originalCommits = commitsData

    for item in commitsData:
        tmpData = getCommitData(item)
        dealdData.append(tmpData)

    while True:
        listLink = re.findall(r'(?<=<).[^<]*(?=>; rel=\"next)', headData)
        print("listLink: ", listLink)
        if (listLink):
            nextLink = listLink[0]
            print("nextLink: ", nextLink)
            #time.sleep(3)
            commitsResponse = getURL.getURL(
                nextLink)  #request.urlopen(nextLink)
            commitsData = commitsResponse.data
            commitsData = json.loads(commitsData)

            originalCommits = originalCommits + commitsData

            headData = str(commitsResponse.headers)

            for item in commitsData:
                tmpData = getCommitData(item)
                dealdData.append(tmpData)

        else:
            break

    for item in dealdData:
        user = str(item['author']['email'])
        commitInfo = {}
        commitInfo['date'] = item['author']['date']
        commitInfo['message'] = item['message']
        commitInfo['url'] = item["url"]

        if (user not in thisMonthUser):
            thisMonthUser[user] = []
        if (user not in allUsers):
            allUsers[user] = []
        thisMonthUser[user].append(commitInfo)
        allUsers[user].append(commitInfo)

    with open(prefix + thisMonth + "-commitsUser.json", 'w') as f:
        json.dump(thisMonthUser, f)

    with open(prefix + thisMonth + "-commitsInfo.json", 'w') as f:
        json.dump(dealdData, f)

    with open(prefix + thisMonth + "-originalCommits.json", 'w') as f:
        json.dump(originalCommits, f)

    with open(prefix + 'allUsers.json', 'w') as f:
        json.dump(allUsers, f)
from getContent import getContent
from getURL import getURL
import MySQLdb


codeDescriptionList = [line.strip() for line in open("job_id_titles.txt", "r")]
file = open("job_id_title_description", "w+")
for each_item in codeDescriptionList:
    gensimIndex = each_item.index(',')
    code = each_item[:gensimIndex].strip()
    jobtitles = each_item[gensimIndex + 1:]
    try:
        url = getURL(code)
        description = getContent(url)
        file.write(code + "|" + jobtitles + "|" + description + '\n')
    except ValueError:
        print "value err"


file.close()
Ejemplo n.º 18
0
from download_compare import download_compare
import commands

# to be set
download_directory = "~/Download"

options = Options()
# ブラウザ立ち上げを無効にする
# options.set_headless(True)

word = '電通 過労死'

# make directory
commands.getoutput('mkdir ./data/' + word)

url = getURL(word)

print url

splitword = word.split(' ')

#アクセスするURL
# url = 'https://app.mieru-ca.com/faber-extract/suggest-keyword-network?keyword=%E9%9B%BB%E9%80%9A&input=google_JP&action=view'
# url = 'https://app.mieru-ca.com/faber-extract/suggest-keyword-network?keyword=%E5%B0%B1%E6%B4%BB%E3%80%80%E8%85%95%E6%99%82%E8%A8%88&input=google_JP&action=view'
loginurl = 'https://app.mieru-ca.com/faber-extract/'

driver = webdriver.Chrome(chrome_options=options)
driver.get(loginurl)

# ID/PASSを入力
id = driver.find_element_by_id("txt_email_login")