def _fix_hotels(self): if self.country_code == "cn": # Fix chinese coordinates. # https://en.wikipedia.org/wiki/Restrictions_on_geographic_data_in_China for hotel in self.hotels: hotel_data = hotel["hotel_data"] location = hotel_data["location"] try: location["latitude"], location["longitude"] = gcj2wgs_exact( float(location["latitude"]), float(location["longitude"]) ) except ValueError: logging.exception(f"Converting error {location}")
def _fix_hotels(self): if self.country_code == "cn": # Fix chinese coordinates. # https://en.wikipedia.org/wiki/Restrictions_on_geographic_data_in_China for hotel in self.hotels: hotel_data = hotel["hotel_data"] location = hotel_data["location"] try: location["latitude"], location[ "longitude"] = gcj2wgs_exact( float(location["latitude"]), float(location["longitude"])) except ValueError: logging.exception(f"Converting error {location}")
def test_z_speed(self): n = 100000 tests = ( ('wgs2gcj', lambda: eviltransform.wgs2gcj(TESTS[0][0], TESTS[0][1])), ('gcj2wgs', lambda: eviltransform.gcj2wgs(TESTS[0][0], TESTS[0][1])), ('gcj2wgs_exact', lambda: eviltransform.gcj2wgs_exact(TESTS[0][0], TESTS[0][1])), ('distance', lambda: eviltransform.distance(*TESTS[0])) ) print('\n' + '='*30) for name, func in tests: sec = timeit.timeit(func, number=n) print('%s\t%.2f ns/op' % (name, sec * 1e9 / n))
def transform(src, dst): latlon = re.compile('lat="(\d+\.\d+)" lon="(\d+\.\d+)"') try: rfile = open(src, 'r') wfile = open(dst, 'w', newline='') for line in rfile: m = latlon.search(line) if m is not None: gcjlat, gcjlon = m.group(1, 2) wgslat, wgslon = gcj2wgs_exact(float(gcjlat), float(gcjlon)) line = line.replace(gcjlat, str(round(wgslat, 6))) line = line.replace(gcjlon, str(round(wgslon, 6))) wfile.write(line) except Exception as err: raise err finally: rfile.close() wfile.close()
def translate(source, output): ''' Reads *.pkl files and produces a single list of hotels as tab separated values. ''' files = [os.path.join(source, filename) for filename in os.listdir(source) if filename.endswith('.pkl')] data = [] for filename in sorted(files): logging.info('Processing {0}'.format(filename)) with open(filename, 'rb') as fd: data += pickle.load(fd) # Fix chinese coordinates for hotel in data: if hotel['countrycode'] == 'cn' and 'location' in hotel: try: hotel['location']['latitude'], hotel['location']['longitude'] = eviltransform.gcj2wgs_exact( float(hotel['location']['latitude']), float(hotel['location']['longitude'])) except ValueError: # We don't care if there were errors converting coordinates to float pass # Dict of dicts city_id -> { currency -> [prices] } cities = defaultdict(lambda: defaultdict(list)) def valid(hotel): return 'city_id' in hotel and 'currencycode' in hotel and 'minrate' in hotel and hotel['minrate'] is not None # Collect prices for hotel in data: if valid(hotel): cities[hotel['city_id']][hotel['currencycode']].append(float(hotel['minrate'])) # Replaces list of prices by a median price. for city in cities: for cur in cities[city]: cities[city][cur] = sorted(cities[city][cur])[len(cities[city][cur]) / 2] # Price rate ranges, relative to the median price for a city rates = (0.7, 1.3) def get_hotel_field(hotel, field, rate): if field == '.lat': return hotel['location']['latitude'] elif field == '.lon': return hotel['location']['longitude'] elif field == '.rate': return rate elif field == '.trans': # Translations are packed into a single column: lang1|name1|address1|lang2|name2|address2|... if 'translations' in hotel: tr_list = [] for tr_lang, tr_values in hotel['translations'].items(): tr_list.append(tr_lang) tr_list.extend([tr_values[e] for e in ('name', 'address')]) return '|'.join([s.replace('|', ';') for s in tr_list]) else: return '' elif field in hotel: return hotel[field] elif field == 'ranking': # This field is not used yet, and booking.com sometimes blocks it. return '' logging.error('Unknown hotel field: {0}, URL: {1}'.format(field, hotel['url'])) return '' with open(output, 'w') as fd: for hotel in data: rate = 0 if valid(hotel): avg = cities[hotel['city_id']][hotel['currencycode']] price = float(hotel['minrate']) rate = 1 # Find a range that contains the price while rate <= len(rates) and price > avg * rates[rate - 1]: rate += 1 l = [get_hotel_field(hotel, e, rate) for e in HOTEL_FIELDS] print('\t'.join([unicode(f).encode('utf8').replace('\t', ' ').replace('\n', ' ').replace('\r', '') for f in l]), file=fd)
def test_gcj2wgs_exact(self): for wgsLat, wgsLng, gcjLat, gcjLng in TESTS: ret = eviltransform.gcj2wgs_exact(gcjLat, gcjLng) self.assertLess( eviltransform.distance(ret[0], ret[1], wgsLat, wgsLng), .5)
def test_gcj2wgs_exact(self): for wgsLat, wgsLng, gcjLat, gcjLng in TESTS: ret = eviltransform.gcj2wgs_exact(gcjLat, gcjLng) self.assertLess(eviltransform.distance( ret[0], ret[1], wgsLat, wgsLng), .5)