Exemple #1
0
def review_result_is_valid(hotel_id):
    with taDB(common.TA_DB) as db:
        record = db.read_a_hotel(hotel_id)
    if record is None:
        return False
    rno = record[3]
    if int(rno) == 0:
        logger.info('[hotel {}] PASSED: no reviews'.format(hotel_id))
        return True
    rid_str = record[4]
    rids = ast.literal_eval(rid_str)
    if rno < len(rids):
        return False

    with taDB(common.TA_DB) as db:
        for rid in rids:
            rrecord = db.read_a_review(rid)
            if rrecord is None:
                return False
            html = rrecord[1]
            if html is None:
                logger.info('[hotel {}] FAILED: HTML is absent'.format(hotel_id))
                return False
            rec_soup = common.load_soup_string(html)
            if rec_soup.find('div', id=''.join(['review_', rid])) is None:
                print(html)
                logger.info('[hotel {}] FAILED: corrupted HTML'.format(hotel_id))
                return False
    logger.info('[hotel {}] PASSED: verified'.format(hotel_id))
    return True
Exemple #2
0
 def __init__(self, html):
     self._soup = common.load_soup_string(html)
     # JSON
     self._json = json.loads(
         str(
             self._soup.find('script',
                             type='application/ld+json').getText()))
Exemple #3
0
    def __init__(self, html):
        self._review_soup = common.load_soup_string(
            html).find('div', class_='reviewSelector')

        # review id
        # len('review_') = 7
        self.rid = self._review_soup['id'].strip()[7:]

        # user id (optional)
        # len('_UID') =4; len(uid) = 32
        uid_string = self._review_soup.find(
            'div', class_='member_info').find(
            'div', id=re.compile('^UID_'))
        self.uid = None
        if uid_string is not None:
            self.uid = uid_string['id'].strip()[4:36]

        # BUBBLE
        self._bubble = self._review_soup.find(
            'div', class_='innerBubble').find(
            'div', class_='wrap')

        # RATING
        self._inline = self._bubble.find(
            'div', class_='reviewItemInline')

        # RECOMMEND
        self._rec_bar = self._bubble.find(
            'div', class_='rating-list')
Exemple #4
0
    def __init__(self, html):
        self.soup = common.load_soup_string(html)

        # OVERLAY
        self.overlay = self.soup.find('div', class_='memberOverlay')

        # LEFT PROFILE
        self.left_profile = self.soup.find('div', class_='leftProfile')

        # RIGHT CONTRIBUTIONS
        self.right_con = self.soup.find('div', class_='rightContributions')
Exemple #5
0
 def user_is_valid(uid):
     with taDB(common.TA_DB) as db:
         user_record = db.read_a_user(uid)
     if user_record is None:
         return False
     html = user_record[0]
     if html is None:
         return False
     soup = common.load_soup_string(html)
     if soup.find('div', id='MODULES_MEMBER_CENTER') is None:
         logger.info('[user {}] FAILED: corrupted'.format(uid))
         return False
     else:
         logger.info('[user {}] PASSED: verified'.format(uid))
         return True
Exemple #6
0
def save_reviews(web_data):
    web_soup = common.load_soup_string(web_data)
    review_soups = web_soup.find_all('div', id=re.compile('review_\d+'))
    records = []
    any_rids = []
    for x in review_soups:
        # len('review_') = 7
        any_rid = x['id'][7:]
        any_html = x.prettify()
        any_uid = re.search('[A-Z0-9]{32}', any_html)
        if any_uid is not None:
            any_uid = any_uid.group(0)
        any_rids.append(any_rid)
        records.append((any_rid, any_html, any_uid))
    with lock:
        with taDB(common.TA_DB) as db:
            db.insert_many_reviews(records)
    return any_rids