Esempio n. 1
0
def getCommentById(id):
    url=getAbsPath('/poi/__pagelet__/pagelet/poiCommentListApi')

    pagecount=-1
    pagenum=1

    file=open('comments.csv','a',encoding='utf-8')
    writer =None
    try:
        while True:
            print('Comment page '+str(pagenum))
            data=geneCommData(id,pagenum)
            html=requestByGet(url,isJson=True,data=data)['data']['html']
            dom=loadDom(html)

            if pagecount<0:
                pagecount=getPageCount(dom)

    
      
            for comment in dom.find_all('li',class_='rev-item comment-item clearfix'):
                com=getCommentDetails(comment,id)
                if(writer==None):
                    writer=getCsvWriter(file,list(com.keys()))
                writer.writerow(com)

            pagenum=pagenum+1
            if(pagenum>pagecount):
                break
    finally:
        file.close()
Esempio n. 2
0
def getAround(node):
    if node:
        params=node['data-params']
        id=json.loads(params)['poi_id']
        data={'params':params}
        url=getAbsPath('/poi/__pagelet__/pagelet/poiLocationApi')
        data=requestByGet(url,isJson=True,data=data)['data']['html']
        dom=loadDom(data)

        file=open('around.csv','a',encoding='utf-8')
        
        writer=None
        try:
            for node in dom.find_all('li'):
                dic=dict()
                dic['id']=id
                dic['aid']=node['data-id']
                dic['aname']=node['data-name']
                dic['atype']=node['data-type']
                dic['dist']=getValue(node.find('span'))
                if writer==None:
                    writer=getCsvWriter(file,dic.keys())
                writer.writerow(dic)
        finally:
            file.close()
Esempio n. 3
0
def getPoiDescription(url,dic):
    url=getAbsPath(url)
    html=requestByGet(url)
    dom=loadDom(html)
    dic['description']=getValue(findByClass(dom,'summary'))
    if(dic['description']!=''):
        node = dom.find(attrs={'data-anchor':'overview'})
        getOverview(node,dic)
Esempio n. 4
0
def getReview(uid):
    url=getAbsPath('/home/ajax_review.php')
    data={'act':'loadList',
          'filter':0,
          'offset':0,
          'limit':500,
          'uid':uid,
          'sort':1}
    data=requestByGet(url,isJson=True,data=data)
Esempio n. 5
0
def getDetail(url):
    dic = dict()
    
    url=getAbsPath(url)
    html=requestByGet(url)
    dom=loadDom(html)

    node = dom.find(attrs={'data-anchor':'overview'})
    getOverview(node,dic)
    
    node=dom.find(attrs={'data-anchor':'commentlist'})

    getComments(node,dic)


    return dic
Esempio n. 6
0
def collectComment():
    try:
        file=open('comments.csv','w',encoding='utf-8')
        writer =None

        while True:  
            lock.acquire() 
            item=comDic.popitem()
            lock.release() 

            scenic=item[0]
            params=item[1]
            id=json.loads(params)['poi_id']
            print(scenic,' ',id)

            url=getAbsPath('/poi/__pagelet__/pagelet/poiCommentListApi')

            pagecount = -1
            pagenum = 1
        
            while True:
                print('Comment page ' + str(pagenum))
                data=geneCommentPageData(params,pagenum)
                html = requestByGet(url,isJson=True,data=data)['data']['html']
                dom = loadDom(html)
                if pagecount < 0:#首次请求
                    pagecount = getPageCount(dom)
                    getCommentDevide(dom,dic)

                for comment in dom.find_all('li',class_='rev-item comment-item clearfix'):
                    com = getCommentDetails(comment,id)
                    if(writer == None):
                        writer = getCsvWriter(file,list(com.keys()))
                    writer.writerow(com)
                pagenum = pagenum + 1
                if(pagenum > pagecount):
                    break
        
            
    finally:
        file.close()
Esempio n. 7
0
def getUsersThread(writer,uid):
    url=getAbsPath('/home/ajax_review.php')
    offset=0
    hasmore=True
    while hasmore:
        data=geneUserData(uid,offset)
        data=requestByGet(url,isJson=True,data=data)['data'];
        hasmore=data['hasmore']=='true'
        data=data['html']
        dom=loadDom(data)
        for node in dom.find_all(class_=re.compile('poi-item')):
            dic=dict()
            dic['uid']=uid
            dic['poi_href']=findByClass(node,'cover').find('a')['href']
            dic['poi_id']=extractNum(dic['poi_href'])
            dic['name']=getValue(findByClass(node,'title'))
            dic['star']=findByClass(node,'rating')['data-star']
            dic['content']=getValue(findByClass(node,'poi-rev _j_comment'))
            dic['datetime']=getValue(findByClass(node,'time'))
            getPoiDescription(dic['poi_href'],dic)
            writer.writerow(dic)
            offset=offset+40
Esempio n. 8
0
def getComments(node,dic):
    if node:
        params=node.find('div')['data-params']
        
        id=json.loads(params)['poi_id']
        dic['id']=id
        url=getAbsPath('/poi/__pagelet__/pagelet/poiCommentListApi')
        
        pagecount=-1
        pagenum=1
        
        file=open('comments.csv','a',encoding='utf-8')
        #writer =None
        try:
            while True:
                #print('Comment page '+str(pagenum))
                data=geneCommentPageData(params,pagenum)
                html=requestByGet(url,isJson=True,data=data)['data']['html']
                dom=loadDom(html)

                if pagecount<0:#首次请求
                    #pagecount=getPageCount(dom)
                    getCommentDevide(dom,dic)
                break

            
      
                #for comment in dom.find_all('li',class_='rev-item comment-item clearfix'):
                #    com=getCommentDetails(comment,id)
                #    if(writer==None):
                #        writer=getCsvWriter(file,list(com.keys()))
                #    writer.writerow(com)

                pagenum=pagenum+1
                if(pagenum>pagecount):
                    break
        finally:
            file.close()
Esempio n. 9
0
def getUsers():
    file=open('leftusers.txt','rt',encoding='utf-8')
    destfile=open('userpois.csv','wt',encoding='utf-8')

    try:
        writer=None
        url=getAbsPath('/home/ajax_review.php')
        users=list(file.readlines())
        
        for user in users:
            uid=extractNum(user)
            print('user ',user)
            offset=0
            hasmore=True
            while hasmore:
                data=geneUserData(uid,offset)
                data=requestByGet(url,isJson=True,data=data)['data'];
                hasmore=data['hasmore']=='true'
                data=data['html']
                dom=loadDom(data)
                for node in dom.find_all(class_=re.compile('poi-item')):
                    dic=dict()
                    dic['uid']=uid
                    dic['poi_href']=findByClass(node,'cover').find('a')['href']
                    dic['poi_id']=extractNum(dic['poi_href'])
                    dic['name']=getValue(findByClass(node,'title'))
                    dic['star']=findByClass(node,'rating')['data-star']
                    dic['content']=getValue(findByClass(node,'poi-rev _j_comment'))
                    dic['datetime']=getValue(findByClass(node,'time'))
                    getPoiDescription(dic['poi_href'],dic)

                    if writer==None:
                        writer=getCsvWriter(destfile,dic.keys())
                    writer.writerow(dic)
                offset=offset+40
    finally:
        file.close()
        destfile.close()
Esempio n. 10
0
def geneDomFromUrl(url):
    html = requestByGet(url)
    return geneDom(html)