def saveText(soup): gistnum = getunicode(soup.find('div',{'class':'path'})).split()[-1] span = soup.find('span',{'class':'code'}) filename = getunicode(span).split()[0] filename = '_'.join([gistnum,filename]) f = open(filename,'w') lines = soup.findAll('div',{'class':'line'}) for line in lines: f.writelines(getPrintUnicode(line)+'\n') f.close() return
def getRepo(result): repo_author = '' repo_url = '' repo_language = '' repo_des='' repo_detail = '' title = result.find('',{'class':'title'}) repo_url = title.find('a',{'href':True})['href'] repo_author = getunicode(title.find('a',{'href':True})) repo_language = getunicode(result.find('span',{'class':'language'})) repo_des = getunicode(result.find('div',{'class':'description'})) repo_detail = getunicode(result.find('div',{'class':'details'})) repo_detail_dict = {'author':repo_author, 'url':repo_url, 'language':repo_language,'description':repo_des,'detail':repo_detail} return repo_detail_dict
def searchgists(soup, gist_dict={}, search_count=0): gistlist = soup.findAll('div',{'class':'info'}) if gistlist: for gist in gistlist: gistlink = gist.find('a',{'href':True}) if gistlink: title = getunicode(gistlink) url = gistlink['href'] print 'https://gist.github.com'+url search_count+=1 gist_dict[search_count] = {'title':title,'url':url} return (gist_dict, search_count)
def saveText(soup): data_path = soup.find('div',{'class':True,'data-path':True})['data-path'] title = soup.find('title') title = getunicode(title).split()[-3] data_path = os.path.join(os.getcwd(),title,data_path) path, filename = os.path.split(os.path.split(data_path)[0]) makeDir(path) filepath = os.path.join(path, filename) f = open(filepath,'w') lines = soup.findAll('div',{'class':'line'}) for line in lines: f.writelines(getPrintUnicode(line)+'\n') f.close() print filepath, 'saved' return
def saveimg(soup): title = soup.find('title') title = getunicode(title).split()[-3] div = soup.find("div",{"class":"breadcrumb","data-path":True}) data_path = div['data-path'] data_path = os.path.join(os.getcwd(),title,data_path) path, filename = os.path.split(os.path.split(data_path)[0]) makeDir(path) filepath = os.path.join(path, filename) print filepath,"saved" f = open(filepath,"wb") img = soup.find("div",{"class":"image"}) url = img.find("img")["src"] url = "".join(["https://github.com",url]) page = opener.open(url) f.write(page.read()) f.close()