Example #1
0
#!/usr/bin/env python
# coding=utf-8
from spider import Spider
spider = Spider()
spider.setworkdir('/data/work/ys/oriinfo/ownerinfo/')
spider.setfilename('owneridlist.txt')
f = open(spider.getfilename(),'r+')
while True:
    dic = {}
    dic['diary'] = dic['information'] = dic['allComments'] = dic['order'] = {}
    line = f.readline()
    if not line:
        break
    line = line[:-1]
    print line
    soup = spider.getSoup('http://www.xiaozhu.com/fangdong/' + line + '/pinglun.html')
    ul = soup.find('ul',{'class':'comment_right'})
    dic['allComments']['rate'] = {}
    item = ['sanitationRate','descriptionRate','performanceRate','securityRate','locationRate']
    if ul == None:
        dic['nohtml'] = True
        for i in item:
            dic['allComments']['rate'][i] = 'NULL'
        dic['allComments']['rate']['allcommentRate'] = 'NULL'
    else:
        dic['nohtml'] = False
        liAll = ul.findAll('li')
        cot = 0
        for li in liAll:
            print li
            grade = li.find('span').find('em').get('value')