Python combinePath Examples

Programming Language: Python

Namespace/Package Name: sysPath

Method/Function: combinePath

Examples at hotexamples.com: 3

Python combinePath - 3 examples found. These are the top rated real world Python examples of sysPath.combinePath extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: getWebpage.py Project: gaoyunzhi/crawling_toolkit

def getWebpage(link='',
               dataDir='webpages',
               timeSleep=0,
               cookies='',
               reLoad=False,
               debug=False,
               read=True,
               referer='',
               info='',
               retry_num=10):
    link = link.strip()
    if link == '': return
    createPath(dataDir)
    fname = combinePath(dataDir, toFname(cookies + link + info))
    if not reLoad:
        try:
            f = open(fname, 'r')
            page = f.read()
            f.close()
            if debug: print 'read from cached file'
            return page
        except:
            pass

    if debug: print 'reading from web'
    time.sleep(timeSleep)
    for i in range(retry_num):
        try:
            page_info = urllib2.build_opener()
            page_info.addheaders = [('User-Agent', 'safari/536.25'),
                                    ('Cookie', cookies), ('Referer', referer)]
            page = page_info.open(link)
            if read:
                try:
                    page = page.read()
                except:
                    print 'error reading page, try again (until trying time reach 10)'
                    print link
                    continue
            break
        except (urllib2.HTTPError, urllib2.URLError), e:
            try:
                print e.code,
            except:
                pass
            page = ''
        time.sleep(timeSleep)

Example #2

Show file

File: getWebpage.py Project: gaoyunzhi/crawling_toolkit

def getWebpage(link='', dataDir='webpages', timeSleep=0, 
               cookies='', reLoad=False, debug=False, read=True,referer='',
               info='',retry_num=10):
    link=link.strip()
    if link=='': return
    createPath(dataDir)
    fname=combinePath(dataDir,toFname(cookies+link+info))
    if not reLoad:
        try:
            f=open(fname,'r')
            page=f.read()
            f.close()
            if debug: print 'read from cached file'
            return page
        except:
            pass
    
    if debug: print 'reading from web' 
    time.sleep(timeSleep)       
    for i in range(retry_num):
        try:
            page_info = urllib2.build_opener()
            page_info.addheaders = [('User-Agent', 'safari/536.25'),
                                    ('Cookie', cookies),
                                    ('Referer',referer)
                                    ]
            page = page_info.open(link)
            if read: 
                try:
                    page=page.read()
                except:
                    print 'error reading page, try again (until trying time reach 10)'
                    print link
                    continue
            break
        except (urllib2.HTTPError,urllib2.URLError), e:
            try:
                print e.code,
            except:
                pass
            page=''
        time.sleep(timeSleep)

Example #3

Show file

 createPath(album_name)
 i=0
 #print len(info)
 for name,url in info:
     i+=1
     ind=name.find('（')
     if ind>1: name=name[:ind]
     cap_content=name
     name=name.replace('/','')
     name=' '.join(name.split())
     name=name.replace(' ','_')   
     if name!='':
         fname=str(i)+'_'+name  
     else:
         fname=str(i)
     name=combinePath(album_name,fname)+'.jpg'     
     try:
         urllib.urlretrieve(url,name)
     except:
         print url,name
         continue
     if os.stat(name).st_size<1000:
         if type=='douban':
             url=url.replace('large','photo')
         if type=='renren':
             url=url.replace('original','large')
         try:
             urllib.urlretrieve(url,name)
         except:
             print url, name
             continue