Ejemplo n.º 1
0
    def from_jset(self, jset_id):
        jset = self.db.jsets.find_one({"jset_id": jset_id})
        if not jset:
            raise JsetNotFound(jset_id)  # noqa

        intro = strip_tags(jset["overview"])
        tips = strip_tags(jset["what_to_know"])
        recommend = strip_tags(jset["what_we_love"])
        travel_tips = strip_tags(jset["travel_tips"])
        if isinstance(travel_tips, list):
            travel_tips = "\n".join(travel_tips)

        add = JsetAdditional(jset["origin_body"])

        location = (jset.get("geocode", {}).get("geometry",
                                                {}).get("location", {}))

        self._doc["en"] = {
            "name": jset["name"].split("(")[0].strip(),
            "address": self.get_address(jset),
            "introduction": "\n".join(intro),
            "recommend_reason": "\n".join(recommend),
            "info": travel_tips,
            "tips": "\n".join(tips),
        }

        self._doc["city_name"] = self.city or add.city()
        self._doc["recommend_reason"] = recommend
        self._doc["jset_name"] = self._doc["en"]["name"]
        self._doc["jset_name_en"] = self._doc["en"]["name"]
        self._doc["name"] = self._doc["en"]["name"]
        self._doc["name_en"] = self._doc["en"]["name"]
        self._doc["address"] = self._doc["en"]["address"]
        self._doc["latitude"] = location.get("lat") or jset["latitude"]
        self._doc["longitude"] = location.get("lng") or jset["longitude"]
        self._doc["rating"] = add.rating()
        self._doc["comments_url"] = self.__comments_url or add.url()
        self._doc["comments_from"] = "TripAdvisor"
        self._doc["gallery"] = add.images()
        self._doc["cover_image_url"] = self._doc["gallery"][0]["image_url"]
        if not self._capture_id:
            self._doc.update(self.capture_url(jset_id=jset_id))
        self._doc["price"] = add.price() or 0
        self._doc["introduction"] = intro
        self._doc["tips"] = tips
        self._doc["traffic_info"] = travel_tips
        self._doc["jset_id"] = jset_id

        self.__loaded_from_jset = True

        u = self.telephone_and_website(self._doc["capture_id"],
                                       self._doc["address"])

        if u:
            logger.debug(u)
            self._doc.update(u)
        else:
            logger.debug("Bad telephone and website, %s", self._doc["address"])
Ejemplo n.º 2
0
    def from_jset(self, jset_id):
        jset = self.db.jsets.find_one({'jset_id': jset_id})
        if not jset:
            raise JsetNotFound(jset_id)  # noqa

        intro = strip_tags(jset['overview'])
        tips = strip_tags(jset['what_to_know'])
        recommend = strip_tags(jset['what_we_love'])
        travel_tips = strip_tags(jset['travel_tips'])
        if isinstance(travel_tips, list):
            travel_tips = '\n'.join(travel_tips)

        add = JsetAdditional(jset['origin_body'])

        location = jset.get('geocode', {}).get('geometry', {}) \
            .get('location', {})

        self._doc['en'] = {
            'name': jset['name'].split('(')[0].strip(),
            'address': self.get_address(jset),
            'introduction': '\n'.join(intro),
            'recommend_reason': '\n'.join(recommend),
            'info': travel_tips,
            'tips': '\n'.join(tips),
        }

        self._doc['city_name'] = self.city or add.city()
        self._doc['recommend_reason'] = recommend
        self._doc['jset_name'] = self._doc['en']['name']
        self._doc['jset_name_en'] = self._doc['en']['name']
        self._doc['name'] = self._doc['en']['name']
        self._doc['name_en'] = self._doc['en']['name']
        self._doc['address'] = self._doc['en']['address']
        self._doc['latitude'] = location.get('lat') or jset['latitude']
        self._doc['longitude'] = location.get('lng') or jset['longitude']
        self._doc['rating'] = add.rating()
        self._doc['comments_url'] = self.__comments_url or add.url()
        self._doc['comments_from'] = 'TripAdvisor'
        self._doc['gallery'] = add.images()
        self._doc['cover_image_url'] = self._doc['gallery'][0]['image_url']
        if not self._capture_id:
            self._doc.update(self.capture_url(jset_id=jset_id))
        self._doc['price'] = add.price() or 0
        self._doc['introduction'] = intro
        self._doc['tips'] = tips
        self._doc['traffic_info'] = travel_tips
        self._doc['jset_id'] = jset_id

        self.__loaded_from_jset = True

        u = self.telephone_and_website(self._doc['capture_id'],
                                       self._doc['address'])

        if u:
            logger.debug(u)
        else:
            logger.debug('Bad telephone and website, %s', self._doc['address'])
        self._doc.update(u)
Ejemplo n.º 3
0
async def jset_payload(jset):
    travel_tips = jset['travel_tips']
    if isinstance(travel_tips, list):
        travel_tips = strip_tags('\n'.join(travel_tips))
        travel_tips_zh = await translate(travel_tips)
        log.warn('list')
    elif isinstance(travel_tips, str) and travel_tips != '':
        travel_tips = strip_tags(travel_tips)
        travel_tips_zh = await translate(travel_tips)
    else:
        log.warn(
            f'Unkown type<{type(travel_tips)}> of travel_tips: {jset["url"]}'
        )  # noqa
        travel_tips = ''
        travel_tips_zh = ''
    location = jset.get('geometry', {}).get('location')
    if not location:
        location = {'lat': jset['latitude'], 'lng': jset['longitude']}
    addition = JsetAdditional(jset['origin_body'])
    en = {
        'name':
        jset['name'],
        'address':
        jset.get('formatted_address', ','.join(jset.get('how_to_get_there'))),
        'introduction':
        strip_tags('\n'.join(jset['overview'])),
        'recommend_reason':
        strip_tags('\n'.join(jset['what_we_love'])),
        'info':
        travel_tips,
        'tips':
        strip_tags('\n'.join(jset['what_to_know']))
    }
    item = {
        'en': en,
        'city_name': addition.city(),
        'recommend_reason': await translate(en['recommend_reason']),
        'name': await translate(jset['name']),
        'name_en': jset['name'],
        'hotel_id': jset['hotel_id'],
        'address': await translate(en['address']),
        'latitude': location['lat'],
        'longitude': location['lng'],
        'rating': addition.rating(),
        'comments_url': addition.url(),
        'comments_from': 'TripAdvisor',
        'gallery': addition.images(),
        'capture_url': jset['url'],
        'price': addition.price() or 0,
        'introduction': await translate(en['introduction']),
        'tips': await translate(en['tips']),
        'traffic_info': travel_tips_zh,
    }
    log.error(item['price'])
    return item
Ejemplo n.º 4
0
    def from_rxml(self, hotel_id):
        hotel = self.db.rxmls.find_one({'hotel_id': hotel_id})
        if not hotel:
            raise RoomsxmlNotFound(hotel_id)  # noqa

        self._doc['hotel_id'] = hotel_id
        self.__loaded_from_rxml = True
        if self.__loaded_from_jset:
            return
        self._doc['city_name'] = self.city or hotel['address']['city']
        self._doc['address'] = self.get_address(hotel)
        self._doc['latitude'] = hotel['latitude']
        self._doc['longitude'] = hotel['longitude']
        self._doc['rank'] = hotel['rank']
        self._doc['roomsxml_name'] = hotel['name']
        self._doc['roomsxml_address'] = self._doc['address']
        self._doc['name'] = hotel['name']
        self._doc['name_en'] = hotel['name']
        self._doc['introduction'] = strip_tags(hotel['description']['text'])
        if not self._capture_id:
            self._doc.update(
                self.capture_url(hotel_id=hotel_id, jset_id=self._jset_id))

        u = self.telephone_and_website(
            self._doc['capture_id'],
            ', '.join([self._doc['name'], self._doc['address']]))
        if u:
            logger.debug(u)
            self._doc.update(u)
        else:
            logger.debug('Bad telephone and website, %s', self._doc['address'])
Ejemplo n.º 5
0
    def from_rxml(self, hotel_id):
        hotel = self.db.rxmls.find_one({"hotel_id": hotel_id})
        if not hotel:
            raise RoomsxmlNotFound(hotel_id)  # noqa

        self._doc["hotel_id"] = hotel_id
        self.__loaded_from_rxml = True
        if self.__loaded_from_jset:
            return
        self._doc["city_name"] = self.city or hotel["address"]["city"]
        self._doc["address"] = self.get_address(hotel)
        self._doc["latitude"] = hotel["latitude"]
        self._doc["longitude"] = hotel["longitude"]
        self._doc["rank"] = hotel["rank"]
        self._doc["roomsxml_name"] = hotel["name"]
        self._doc["roomsxml_address"] = self._doc["address"]
        self._doc["name"] = hotel["name"]
        self._doc["name_en"] = hotel["name"]
        self._doc["introduction"] = strip_tags(hotel["description"]["text"])
        if not self._capture_id:
            self._doc.update(
                self.capture_url(hotel_id=hotel_id, jset_id=self._jset_id))

        u = self.telephone_and_website(
            self._doc["capture_id"],
            ", ".join([self._doc["name"], self._doc["address"]]),
        )
        if u:
            logger.debug(u)
            self._doc.update(u)
        else:
            logger.debug("Bad telephone and website, %s", self._doc["address"])
Ejemplo n.º 6
0
async def jet_hotel(jets, geocode):
    travel_tips = jets['travel_tips']
    if isinstance(travel_tips, list):
        travel_tips = strip_tags('\n'.join(travel_tips))
        travel_tips_zh = await translate(travel_tips)
    else:
        travel_tips = ''
        travel_tips_zh = ''
    en = {
        'name': jets['name'],
        'address': geocode['formatted_address'],
        'introduction': strip_tags('\n'.join(jets['overview'])),
        'merchant': '',
        'recommend_reason': strip_tags('\n'.join(jets['what_we_love'])),
        'tips': travel_tips,
        'info': strip_tags('\n'.join(jets['what_to_know']))
    }

    return {
        'name': await translate(jets['name']),
        'name_en': jets['name'],
        'hotel_id': jets['hotel_id'],
        'address': await translate(geocode['formatted_address']),
        'latitude': geocode['loc'][1],
        'longitude': geocode['loc'][0],
        'ranking': '',
        'rating': '',
        'telephone': '',
        'website': '',
        'price': 0,
        'indtroduction': await translate(en['introduction']),
        'tips': await translate(en['info']),
        'traffic_info': travel_tips_zh,
        'recommend_reason': await translate(en['recommend_reason']),
        'cover_images': '',
        'capture_url': jets['url'],
        'en': en
    }
Ejemplo n.º 7
0
def payload(dl):
    # bt.trans = lambda x: [{'dst': x}]

    digit = re.compile('[0-9\.]+')
    currency = re.compile('[^A-Z]([A-Z]{3})[^A-Z]')

    addr = dl.get('company', {}).get('address', [])
    addr = ','.join(addr)
    if not addr:
        return None

    name_x = dl.get('title') \
        or dl['origin'].get('hdl') \
        or dl['origin'].get('fhd')
    name_en = name_x
    name = trans.translate(name_en, dest='zh-CN').text
    city = dl['where']
    price = dl.get('prices', {}).get('promo_price')
    if not price:
        price = dl['price']
    try:
        price = float(digit.findall(price)[0])
    except:
        print('Bad price: {}'.format(dl['price']))
        print('Title: {}'.format(name_x))
        print('Url is: {}'.format(dl['url']))
        price = 0
    price_type = currency.search(dl['edition_disclaimer']).group(1)
    cover_images = dl['src_image']
    _merchant = dl['source']
    # merchant = trans.translate(_merchant, dest='zh-CN').text
    _highlights = dl.get('highlights', {})
    highlights_zh = []
    highlights_en = []
    the_deal = strip_tags(_highlights.get('the_deal', ''))
    if the_deal:
        highlights_en.append({'title': 'The Deal', 'description': the_deal})
        highlights_zh.append({
            'title':
            'The Deal',
            'description':
            trans.translate(the_deal, dest='zh-CN').text
        })
    why_love = map(strip_tags, _highlights.get('why_we_love_it', []))
    for why in why_love:
        why_t = trans.translate(why, dest='zh-CN').text
        if not why_t:
            continue
        highlights_en.append({'title': 'Why We Love It', 'description': why})
        highlights_zh.append({'title': 'Why We Love It', 'description': why_t})
    _info = '\n'.join(map(strip_tags, dl.get('whats_included', [])))
    info = trans.translate(_info.replace('\n', '|'), dest='zh-CN') \
        .text.replace('|', '\n')
    _tips = strip_tags(dl.get('term', '').replace('\n', '|'))
    if _tips:
        tips = trans.translate(_tips, dest='zh-CN').text.replace('|', '\n')
    else:
        tips = ''
    phone = dl.get('company', {}).get('telephone')
    available_start = dl['start_time']
    available_end = dl['end_time']
    capture_url = url_cleaner(dl['url'])
    short_introduction = dl['summary_keywords']
    introduction = dl.get('overview')
    en = {
        'merchant': _merchant,
        'highlights': highlights_en,
        'info': _info,
        'tips': _tips,
        'address': addr,
    }

    d = {
        'name': name,
        'name_en': name_en,
        'city_ref': city,
        'info_ref': _info,
        'tips_ref': _tips,
        'tags_ref': '',
        'price': float(price),
        'price_type': price_type,
        'merchant': _merchant,
        'highlights': highlights_zh,
        'cover_images': [{
            'image_url': cover_images
        }],
        'info': info,
        'tips': tips,
        'address': addr,
        'available_start': available_start,
        'available_end': available_end,
        'capture_url': capture_url,
        'en': en,
        'introduction': strip_tags(introduction),
        'short_introduction': strip_tags(short_introduction)
    }
    if phone:
        d['phone'] = phone

    return d