Python sysPathの例

プログラミング言語: Python

名前空間/パッケージ名: sysPath

メソッド/関数: sysPath

hotexamples.comのコード掲載数: 5

Python sysPath - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsysPath.sysPathの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: combine.py プロジェクト: gaoyunzhi/crawling_toolkit

def combineTXT(folder,des='combine.txt'):
    folder=folderPath(folder)
    des=sysPath(des)
    g=codecs.open(des,'w','gb18030')
    for root, dirs, files in os.walk(folder):
        for f in files:
            if f.endswith('.txt'):
                add=sysPath(root+'/'+f)
                fi=codecs.open(add,'r','gb18030')
                g.write(fi.read())
    g.close()

コード例 #2

ファイルを表示

ファイル: combine.py プロジェクト: gaoyunzhi/crawling_toolkit

def combineTXT(folder, des='combine.txt'):
    folder = folderPath(folder)
    des = sysPath(des)
    g = codecs.open(des, 'w', 'gb18030')
    for root, dirs, files in os.walk(folder):
        for f in files:
            if f.endswith('.txt'):
                add = sysPath(root + '/' + f)
                fi = codecs.open(add, 'r', 'gb18030')
                g.write(fi.read())
    g.close()

コード例 #3

ファイルを表示

ファイル: genTable.py プロジェクト: gaoyunzhi/crawling_toolkit

def genTable(filename='../../testData/testingMonuments.txt',\
             outfname='../../testData/testingMonumentsData_week4_all.csv', \
             months=None,yearBegin=2009, yearEnd=2015,silent=True,endLine=None,\
             testNow=False, country='en'):
    now = datetime.datetime.now()
    now=(int(now.year),int(now.month))
    if months==None:
        months=[]
        for year in range(yearBegin,yearEnd):
                for month in range(1,13):
                    if (year, month)>=now: break
                    months.append(str(year)+'0'*(2-len(str(month)))+str(month))
    months=map(str,months)
    filename=sysPath(filename)
    f=open(filename,'r')
    links=f.read().splitlines()
    f.close()    
    #soup=BeautifulSoup(links)
    titleLine=['linkTitle']
    for month in months:
        titleLine.append('Img'+month)
        titleLine.append('Content'+month)
        titleLine.append('Traffic'+month)
    if not os.path.exists(outfname):
        outf=open(outfname,'w')
        outf.write('\t'.join(titleLine)+'\n')
        start=0
        outf.close()
    else:
        outf=open(outfname,'r')
        start=len(outf.read().splitlines())
        outf.close()
    count=0
##    for field in soup.findAll('a')[:endLine]:
    for linkTitle in links:
        index=linkTitle.find('/wiki/')
        if index!=-1:
            linkTitle=linkTitle[index+6:]
        count+=1
        if count<start: continue
##        if not field.has_key('title'): continue
##        linkTitle=field['href'][6:]
##        officialTitle=field['title']
        curLine=[linkTitle]
        for month in months:
            date=month+'01'
            revId=getRevId(linkTitle, date+'000000' , silent=silent,country=country) # 6 zeros for h,m,s
            if not silent: print 'revId=',revId
            if revId==None:
                curLine+=['','','']
                continue
            link='http://'+country+'.wikipedia.org/w/index.php?oldid='+revId
            if testNow: print 'title=',linkTitle, 'link=',link,'month=',month
            if not silent: print 'prepare'
            page=getWebpage(link, timeSleep=0.5,silent=silent)
            if not silent: print 'got page'
            soup=BeautifulSoup(page)
            if not silent: print 'got soup'
            numImg=numImage(soup)            
            if not silent: print 'got num'
            conLen=contentLen(soup)
            if not silent: print 'got len'
            traffic=str(getTraffic(linkTitle,month, silent=silent, country=country))
            if not silent: print 'got history'
            curLine+=[numImg, conLen, traffic]
        curLine=map(str, curLine)
        outf=open(outfname,'a')
        outf.write('\t'.join(curLine)+'\n')
        outf.close()

コード例 #4

ファイルを表示

ファイル: fb_friendlist.py プロジェクト: gaoyunzhi/crawling_toolkit

'''
this program needs to add wait time, may cause problem with your renren id
'''
from getWebpage import getWebpage
import re
import json,time
from sysPath import createFile,sysPath
try:
    from BeautifulSoup import BeautifulSoup,SoupStrainer
except:
    from bs4 import BeautifulSoup,SoupStrainer # beta version of bs

coo='datr=1HSWUNG14Cr81JphyUZWTl2i; lu=gAff9sJJ2_wuev5W3zxFsGZA; sub=128; p=49; c_user=1216615221; csm=2; fr=0regP7HiBNucJQa1n.AWVfvGNhos7mlakT0e52olU2aWo.BQlnT_.nT.AWVtovRV; s=Aa7LrP8dIAOi4SoX; xs=3%3ArXa_AglvHBTByg%3A2%3A1352037631; act=1356128659553%2F6%3A2; presence=EM356128936EuserFA21216615221A2EstateFDsb2F0Et2F_5b_5dElm2FnullEuct2F135610056B0EtrFA2loadA2EtwF1698182903EatF1356128697024G356128936322CEchFDp_5f1216615221F8CC; wd=1280x299'
f=open(sysPath('webpages/ids.txt'))
jf=json.loads(f.read().decode('utf8','ignore'))
f.close()

createFile('infos_fb.txt',force=True)
g=open('infos_fb.txt','a')
g.write('Name,Given Name,Additional Name,Family Name,Yomi Name,Given Name Yomi,Additional Name Yomi,Family Name Yomi,Name Prefix,Name Suffix,Initials,Nickname,Short Name,Maiden Name,Birthday,Gender,Location,Billing Information,Directory Server,Mileage,Occupation,Hobby,Sensitivity,Priority,Subject,Notes,Group Membership,E-mail 1 - Type,E-mail 1 - Value,E-mail 2 - Type,E-mail 2 - Value,Phone 1 - Type,Phone 1 - Value'+'\n')
g.close()

ans=[]
for f in jf['data']:
    info=getWebpage('http://www.facebook.com/'+str(f['id']),
                    cookies=coo,
                    info=str(f['id'])
                    )
    bI=BeautifulSoup(info)
    link=bI.find('link',{'rel':'alternate'})
    '''

コード例 #5

ファイルを表示

ファイル: fb_friendlist.py プロジェクト: gaoyunzhi/crawling_toolkit

'''
this program needs to add wait time, may cause problem with your renren id
'''
from getWebpage import getWebpage
import re
import json, time
from sysPath import createFile, sysPath
try:
    from BeautifulSoup import BeautifulSoup, SoupStrainer
except:
    from bs4 import BeautifulSoup, SoupStrainer  # beta version of bs

coo = 'datr=1HSWUNG14Cr81JphyUZWTl2i; lu=gAff9sJJ2_wuev5W3zxFsGZA; sub=128; p=49; c_user=1216615221; csm=2; fr=0regP7HiBNucJQa1n.AWVfvGNhos7mlakT0e52olU2aWo.BQlnT_.nT.AWVtovRV; s=Aa7LrP8dIAOi4SoX; xs=3%3ArXa_AglvHBTByg%3A2%3A1352037631; act=1356128659553%2F6%3A2; presence=EM356128936EuserFA21216615221A2EstateFDsb2F0Et2F_5b_5dElm2FnullEuct2F135610056B0EtrFA2loadA2EtwF1698182903EatF1356128697024G356128936322CEchFDp_5f1216615221F8CC; wd=1280x299'
f = open(sysPath('webpages/ids.txt'))
jf = json.loads(f.read().decode('utf8', 'ignore'))
f.close()

createFile('infos_fb.txt', force=True)
g = open('infos_fb.txt', 'a')
g.write(
    'Name,Given Name,Additional Name,Family Name,Yomi Name,Given Name Yomi,Additional Name Yomi,Family Name Yomi,Name Prefix,Name Suffix,Initials,Nickname,Short Name,Maiden Name,Birthday,Gender,Location,Billing Information,Directory Server,Mileage,Occupation,Hobby,Sensitivity,Priority,Subject,Notes,Group Membership,E-mail 1 - Type,E-mail 1 - Value,E-mail 2 - Type,E-mail 2 - Value,Phone 1 - Type,Phone 1 - Value'
    + '\n')
g.close()

ans = []
for f in jf['data']:
    info = getWebpage('http://www.facebook.com/' + str(f['id']),
                      cookies=coo,
                      info=str(f['id']))
    bI = BeautifulSoup(info)
    link = bI.find('link', {'rel': 'alternate'})