def main(): writer(title,district, file,interval=False) sleep_time = random.randint(6,13) for page in range(1,num+1): time.sleep(sleep_time) search(str(page))
def getDetailInfo(): wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, '._j_commentlist'))) for i in range(1, num + 1): wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, '._j_commentlist'))) html = browser.page_source doc = pq(html) items = doc('.rev-list .rev-item').items() for item in items: if item == None: print("item中没有信息") else: title = str(item.find('.s-star').attr('class'))[-1] if title == '1': title = "很差" elif title == '2': title = "一般" elif title == '3': title = "好" elif title == '4': title = "很好" elif title == '5': title = "非常好" comment = item.find('.rev-txt').text() comment_time = item.find('.time').text()[:11] info = [title, comment, comment_time] title_info = name_info + info writer(title_info, district, file, interval=False) print(i) nextPage()
def getDetailInfo(): wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#daohang'))) time.sleep(1) while showMoreInfo()== 2 and num <=16 : time.sleep(2) print("in the more information") pass html = browser.page_source doc = pq(html) print(doc) houses_name = doc('#xfxq_B03_01').attr('title') houses_first = doc('body .Comprehensive_score .mgt_2').text() houses_second = re.compile('(.\d\d)').findall(doc('body .Comprehensive_score .fbold14').text()) houses_score = str(houses_first) + houses_second[0] houses_gray = doc('body .Comprehensive_score .font_gray').text() houses_price = re.compile('\d.\d\d').findall(houses_gray)[0] houses_loc = re.compile('\d.\d\d').findall(houses_gray)[1] houses_support = re.compile('\d.\d\d').findall(houses_gray)[2] houses_trans = re.compile('\d.\d\d').findall(houses_gray)[3] houses_env = re.compile('\d.\d\d').findall(houses_gray)[4] houses_info = [houses_name,houses_score,houses_price,houses_loc,houses_support,houses_trans,houses_env] items = doc('#dpContentList .comm_list .comm_list_nr').items() print('楼盘信息保存完毕') for item in items: if item == None: print("item中没有信息") else: try: list_score = item.find('.comm_list_score .inf').text() comm_price = re.compile('(\d)').findall(list_score)[0] comm_loc = re.compile('(\d)').findall(list_score)[1] comm_trans = re.compile('(\d)').findall(list_score)[2] comm_support = re.compile('(\d)').findall(list_score)[3] comm_env = re.compile('(\d)').findall(list_score)[4] comm_score = (int(comm_price)+int(comm_loc)+int(comm_trans)+int(comm_support)+int(comm_env))/5 comment = item.find('.comm_list_con').text() comment_time = item.find('.look_hou').text()[:10] comm_info = [comm_score,comm_price,comm_loc,comm_trans,comm_support,comm_env,comment,comment_time] title_info = houses_info + comm_info writer(title_info, district, file, interval=False) except: continue
def getDetailInfo(): print('in the getDetailInfo') wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, '#review-list > div.review-list-container > div.review-list-main > div.review-list-header > h1 > a')) ) html = browser.page_source print('got html') doc = pq(html) items = doc('#review-list .reviews-items .main-review').items() for item in items: if item == None: print("item中没有信息") else: try: title = str(item.find('.sml-rank-stars').attr('class'))[-7] if title == '1': title = "很差" elif title =='2': title = "一般" elif title =='3': title ="好" elif title =='4': title = "很好" elif title =='5': title = "非常好" except: continue comment = item.find('.review-words').text() comment_time = item.find('.time ').text() info = [title,comment,comment_time] title_info = shop_info + info writer(title_info, district, file ,interval=False) print(info) print('\n') return None
def main(): browser.get(url) writer(title, district, file, interval=False) #goComment() getDetailInfo()