Beispiel #1
0
 def _fix_hotels(self):
     if self.country_code == "cn":
         # Fix chinese coordinates.
         # https://en.wikipedia.org/wiki/Restrictions_on_geographic_data_in_China
         for hotel in self.hotels:
             hotel_data = hotel["hotel_data"]
             location = hotel_data["location"]
             try:
                 location["latitude"], location["longitude"] = gcj2wgs_exact(
                     float(location["latitude"]), float(location["longitude"])
                 )
             except ValueError:
                 logging.exception(f"Converting error {location}")
Beispiel #2
0
 def _fix_hotels(self):
     if self.country_code == "cn":
         # Fix chinese coordinates.
         # https://en.wikipedia.org/wiki/Restrictions_on_geographic_data_in_China
         for hotel in self.hotels:
             hotel_data = hotel["hotel_data"]
             location = hotel_data["location"]
             try:
                 location["latitude"], location[
                     "longitude"] = gcj2wgs_exact(
                         float(location["latitude"]),
                         float(location["longitude"]))
             except ValueError:
                 logging.exception(f"Converting error {location}")
Beispiel #3
0
 def test_z_speed(self):
     n = 100000
     tests = (
         ('wgs2gcj',
             lambda: eviltransform.wgs2gcj(TESTS[0][0], TESTS[0][1])),
         ('gcj2wgs',
             lambda: eviltransform.gcj2wgs(TESTS[0][0], TESTS[0][1])),
         ('gcj2wgs_exact',
             lambda: eviltransform.gcj2wgs_exact(TESTS[0][0], TESTS[0][1])),
         ('distance', lambda: eviltransform.distance(*TESTS[0]))
     )
     print('\n' + '='*30)
     for name, func in tests:
         sec = timeit.timeit(func, number=n)
         print('%s\t%.2f ns/op' % (name, sec * 1e9 / n))
Beispiel #4
0
def transform(src, dst):
    latlon = re.compile('lat="(\d+\.\d+)" lon="(\d+\.\d+)"')
    try:
        rfile = open(src, 'r')
        wfile = open(dst, 'w', newline='')
        for line in rfile:
            m = latlon.search(line)
            if m is not None:
                gcjlat, gcjlon = m.group(1, 2)
                wgslat, wgslon = gcj2wgs_exact(float(gcjlat), float(gcjlon))
                line = line.replace(gcjlat, str(round(wgslat, 6)))
                line = line.replace(gcjlon, str(round(wgslon, 6)))
            wfile.write(line)
    except Exception as err:
        raise err
    finally:
        rfile.close()
        wfile.close()
Beispiel #5
0
def translate(source, output):
    '''
    Reads *.pkl files and produces a single list of hotels as tab separated values.
    '''
    files = [os.path.join(source, filename)
             for filename in os.listdir(source) if filename.endswith('.pkl')]

    data = []
    for filename in sorted(files):
        logging.info('Processing {0}'.format(filename))
        with open(filename, 'rb') as fd:
            data += pickle.load(fd)

    # Fix chinese coordinates
    for hotel in data:
        if hotel['countrycode'] == 'cn' and 'location' in hotel:
            try:
                hotel['location']['latitude'], hotel['location']['longitude'] = eviltransform.gcj2wgs_exact(
                    float(hotel['location']['latitude']), float(hotel['location']['longitude']))
            except ValueError:
                # We don't care if there were errors converting coordinates to float
                pass

    # Dict of dicts city_id -> { currency -> [prices] }
    cities = defaultdict(lambda: defaultdict(list))

    def valid(hotel):
        return 'city_id' in hotel and 'currencycode' in hotel and 'minrate' in hotel and hotel['minrate'] is not None

    # Collect prices
    for hotel in data:
        if valid(hotel):
            cities[hotel['city_id']][hotel['currencycode']].append(float(hotel['minrate']))

    # Replaces list of prices by a median price.
    for city in cities:
        for cur in cities[city]:
            cities[city][cur] = sorted(cities[city][cur])[len(cities[city][cur]) / 2]

    # Price rate ranges, relative to the median price for a city
    rates = (0.7, 1.3)

    def get_hotel_field(hotel, field, rate):
        if field == '.lat':
            return hotel['location']['latitude']
        elif field == '.lon':
            return hotel['location']['longitude']
        elif field == '.rate':
            return rate
        elif field == '.trans':
            # Translations are packed into a single column: lang1|name1|address1|lang2|name2|address2|...
            if 'translations' in hotel:
                tr_list = []
                for tr_lang, tr_values in hotel['translations'].items():
                    tr_list.append(tr_lang)
                    tr_list.extend([tr_values[e] for e in ('name', 'address')])
                return '|'.join([s.replace('|', ';') for s in tr_list])
            else:
                return ''
        elif field in hotel:
            return hotel[field]
        elif field == 'ranking':
            # This field is not used yet, and booking.com sometimes blocks it.
            return ''
        logging.error('Unknown hotel field: {0}, URL: {1}'.format(field, hotel['url']))
        return ''

    with open(output, 'w') as fd:
        for hotel in data:
            rate = 0
            if valid(hotel):
                avg = cities[hotel['city_id']][hotel['currencycode']]
                price = float(hotel['minrate'])
                rate = 1
                # Find a range that contains the price
                while rate <= len(rates) and price > avg * rates[rate - 1]:
                    rate += 1
            l = [get_hotel_field(hotel, e, rate) for e in HOTEL_FIELDS]
            print('\t'.join([unicode(f).encode('utf8').replace('\t', ' ').replace('\n', ' ').replace('\r', '') for f in l]), file=fd)
Beispiel #6
0
 def test_gcj2wgs_exact(self):
     for wgsLat, wgsLng, gcjLat, gcjLng in TESTS:
         ret = eviltransform.gcj2wgs_exact(gcjLat, gcjLng)
         self.assertLess(
             eviltransform.distance(ret[0], ret[1], wgsLat, wgsLng), .5)
Beispiel #7
0
 def test_gcj2wgs_exact(self):
     for wgsLat, wgsLng, gcjLat, gcjLng in TESTS:
         ret = eviltransform.gcj2wgs_exact(gcjLat, gcjLng)
         self.assertLess(eviltransform.distance(
             ret[0], ret[1], wgsLat, wgsLng), .5)