#!/usr/bin/env python # coding=utf-8 from spider import Spider spider = Spider() spider.setworkdir('/data/work/ys/oriinfo/ownerinfo/') spider.setfilename('owneridlist.txt') f = open(spider.getfilename(),'r+') while True: dic = {} dic['diary'] = dic['information'] = dic['allComments'] = dic['order'] = {} line = f.readline() if not line: break line = line[:-1] print line soup = spider.getSoup('http://www.xiaozhu.com/fangdong/' + line + '/pinglun.html') ul = soup.find('ul',{'class':'comment_right'}) dic['allComments']['rate'] = {} item = ['sanitationRate','descriptionRate','performanceRate','securityRate','locationRate'] if ul == None: dic['nohtml'] = True for i in item: dic['allComments']['rate'][i] = 'NULL' dic['allComments']['rate']['allcommentRate'] = 'NULL' else: dic['nohtml'] = False liAll = ul.findAll('li') cot = 0 for li in liAll: print li grade = li.find('span').find('em').get('value')