def saveText(soup): data_path = soup.find('div',{'class':True,'data-path':True})['data-path'] title = soup.find('title') title = getunicode(title).split()[-3] data_path = os.path.join(os.getcwd(),title,data_path) path, filename = os.path.split(os.path.split(data_path)[0]) makeDir(path) filepath = os.path.join(path, filename) f = open(filepath,'w') lines = soup.findAll('div',{'class':'line'}) for line in lines: f.writelines(getPrintUnicode(line)+'\n') f.close() print filepath, 'saved' return
def saveimg(soup): title = soup.find('title') title = getunicode(title).split()[-3] div = soup.find("div",{"class":"breadcrumb","data-path":True}) data_path = div['data-path'] data_path = os.path.join(os.getcwd(),title,data_path) path, filename = os.path.split(os.path.split(data_path)[0]) makeDir(path) filepath = os.path.join(path, filename) print filepath,"saved" f = open(filepath,"wb") img = soup.find("div",{"class":"image"}) url = img.find("img")["src"] url = "".join(["https://github.com",url]) page = opener.open(url) f.write(page.read()) f.close()