def spider_comments(driver: WebDriver, hid: str, n: int) -> int: if Comment.objects.filter(hotel=hid).filter(page=n).count() == 15: return 0 try: driver.get('%s/dianping/%s_p%dt0.html' % (ROOT_URL, hid, n)) driver.implicitly_wait(0.5) except (ConnectionRefusedError, urllib.error.URLError, ConnectionResetError, TypeError, AttributeError): del driver return 403 try: comment_list = driver.find_elements_by_css_selector( '#divCtripComment > div.comment_detail_list')[1] except IndexError: driver.implicitly_wait(5) try: comment_list = driver.find_elements_by_css_selector( '#divCtripComment > div.comment_detail_list')[1] except IndexError: comment_list = driver.find_element_by_css_selector( '#divCtripComment > div.comment_detail_list') if Hotel.objects.filter(hid=hid).count() == 1: hotel = Hotel.objects.get(hid=hid) if hotel.comments_count == 0: try: comment_text = driver.find_element_by_css_selector( "#commentTab > a").text logging.warning("\n%s\n" % comment_text) hotel.comments_count = int( RE_COMMENT.search(comment_text).group()) logging.warning("\n%s\n" % hotel.comments_count) hotel.save() except Exception: pass comments = comment_list.find_elements_by_class_name('comment_block') for comment in comments: try: name = comment.find_element_by_class_name( 'name').find_element_by_tag_name('span').text cid = comment.get_attribute('data-cid') points = comment.find_element_by_class_name('n').text room_type = comment.find_element_by_class_name('room_link').text content = comment.find_element_by_class_name( 'J_commentDetail').text.strip() except Exception: continue logging.info('%s\n%s\n%s\n%s\n%s\n%s\n' % (hid, name, n, room_type, points, content)) # with sqlite3.connect('../../db.sqlite3') as conn: # with conn.cursor() as cursor: # cursor.execute("select * from get_data_comment where (cid=?)", (cid,)) if Comment.objects.filter(cid=cid).count() == 0: Comment.objects.create(cid=cid, content=content, hotel=hid, page=n, points=points, room_type=room_type, name=name) elif not Comment.objects.filter(cid=cid).exclude(page=n).count() == 0: return 1 del driver return 0