Example #1
0
def spider_comments(driver: WebDriver, hid: str, n: int) -> int:
    if Comment.objects.filter(hotel=hid).filter(page=n).count() == 15:
        return 0

    try:
        driver.get('%s/dianping/%s_p%dt0.html' % (ROOT_URL, hid, n))
        driver.implicitly_wait(0.5)
    except (ConnectionRefusedError, urllib.error.URLError,
            ConnectionResetError, TypeError, AttributeError):
        del driver
        return 403
    try:
        comment_list = driver.find_elements_by_css_selector(
            '#divCtripComment > div.comment_detail_list')[1]
    except IndexError:
        driver.implicitly_wait(5)
        try:
            comment_list = driver.find_elements_by_css_selector(
                '#divCtripComment > div.comment_detail_list')[1]
        except IndexError:
            comment_list = driver.find_element_by_css_selector(
                '#divCtripComment > div.comment_detail_list')

    if Hotel.objects.filter(hid=hid).count() == 1:
        hotel = Hotel.objects.get(hid=hid)
        if hotel.comments_count == 0:
            try:
                comment_text = driver.find_element_by_css_selector(
                    "#commentTab > a").text

                logging.warning("\n%s\n" % comment_text)
                hotel.comments_count = int(
                    RE_COMMENT.search(comment_text).group())
                logging.warning("\n%s\n" % hotel.comments_count)
                hotel.save()
            except Exception:
                pass

    comments = comment_list.find_elements_by_class_name('comment_block')

    for comment in comments:
        try:
            name = comment.find_element_by_class_name(
                'name').find_element_by_tag_name('span').text
            cid = comment.get_attribute('data-cid')
            points = comment.find_element_by_class_name('n').text
            room_type = comment.find_element_by_class_name('room_link').text
            content = comment.find_element_by_class_name(
                'J_commentDetail').text.strip()
        except Exception:
            continue
        logging.info('%s\n%s\n%s\n%s\n%s\n%s\n' %
                     (hid, name, n, room_type, points, content))

        # with sqlite3.connect('../../db.sqlite3') as conn:
        #     with conn.cursor() as cursor:
        #         cursor.execute("select * from get_data_comment where (cid=?)", (cid,))
        if Comment.objects.filter(cid=cid).count() == 0:
            Comment.objects.create(cid=cid,
                                   content=content,
                                   hotel=hid,
                                   page=n,
                                   points=points,
                                   room_type=room_type,
                                   name=name)

        elif not Comment.objects.filter(cid=cid).exclude(page=n).count() == 0:
            return 1

    del driver
    return 0