Exemple #1
0
def saveText(soup):
    data_path = soup.find('div',{'class':True,'data-path':True})['data-path']
    title = soup.find('title')
    title = getunicode(title).split()[-3]
    data_path = os.path.join(os.getcwd(),title,data_path)
    path, filename = os.path.split(os.path.split(data_path)[0])
    makeDir(path)
    filepath = os.path.join(path, filename)
    f = open(filepath,'w')
    lines = soup.findAll('div',{'class':'line'})
    for line in lines:
        f.writelines(getPrintUnicode(line)+'\n')
    f.close()
    print filepath, 'saved'
    return    
Exemple #2
0
def saveimg(soup):
    title = soup.find('title')
    title = getunicode(title).split()[-3]
    div = soup.find("div",{"class":"breadcrumb","data-path":True})
    data_path = div['data-path']
    data_path = os.path.join(os.getcwd(),title,data_path)
    path, filename = os.path.split(os.path.split(data_path)[0])
    makeDir(path)
    filepath = os.path.join(path, filename)
    print filepath,"saved"
    f = open(filepath,"wb")
    
    img = soup.find("div",{"class":"image"})
    url = img.find("img")["src"]
    url = "".join(["https://github.com",url])
    
    page = opener.open(url)
    f.write(page.read())
    f.close()