def oneOfSeries(urllist,path):
    searchname = '.*/(.*?).html'
    current_path = '' 
    for url in urllist:
        try:
            name = re.findall(searchname,url,re.S)
            current_path = path + '/' + name[0]
            meizi_page_download.picurl(url,current_path)
            errorReport.success(url)
        except IndexError:
            errorReport.errorIndex(url, searchname)
def save_pic(url,path):
    searchname = '.*/(.*?.jpg)'
    name = re.findall(searchname,url)
    filename = path +'/'+ name[0]
    
    #print filename + ':start'
    
    while True:
        if os.path.exists(filename):
            #os.remove(filename)
            errorReport.errorFileExist(filename)
            print filename,' exists, skip'
            return True
        elif os.path.exists(filename):
            os.mknod(filename)
        if download(url,filename):
            break
    print filename + ':over'
    errorReport.success(filename)
Beispiel #3
0
        conn = urllib2.urlopen(url,data=None,timeout=5)
        html = conn.read()
        return html
    except Exception:
        errorReport.errorLoadUrl(url)
        return ''
def meizi(url,path):
    reTagContent = '<div.*?class="tags">.*?<span>(.*?)</span>'
    reTagUrl = '<a.*?href="(.*?)".*?>'
    print 'start open meiziwang'
    html = ''
    while True:
        html = loadurl(url)
        if html == '':
            print 'load', url,'error'
            continue
        else:
            break
    tagContent = re.findall(reTagContent, html, re.S)
    taglists = re.findall(reTagUrl, tagContent[0], re.S)
    taglists = sorted(list(set(taglists)))
    print 'open meiziwang over'
    #print len(taglists)
    for url in taglists:
        meizi_series_nextpage.nextpage(url,path)
        #meizi_series_getpage.tag_series(url,path)
        #print url
        
meizi('http://www.meizitu.com','/home/hus/Desktop/meizi')
errorReport.success('/home/hus/Desktop/meizi')
print 'All over success and false has been report with txt file'