def process(self): try: if self.link: with open(self.link, 'r') as f: text = json.load(f) for poi_data in text['result']: if poi_data.get('address') is not None: if poi_data.get('type') == '1': self.data.name = 'MagNet Bank ATM' self.data.code = 'humagnatm' self.data.public_holiday_open = True elif poi_data.get('type') in ['0', '2']: self.data.name = 'MagNet Bank' self.data.code = 'humagnbank' self.data.public_holiday_open = False self.data.email = poi_data.get('email') self.data.phone = '+36 1 428 8888' else: logging.info('Unknow type! ({})'.format( poi_data.get('type'))) self.data.postcode, self.data.city, self.data.street, self.data.housenumber, \ self.data.conscriptionnumber = extract_all_address( poi_data.get('address')) self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('lat'), poi_data.get('lon')) self.data.original = poi_data.get('address') self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): if self.link: with open(self.link, 'r') as f: insert_data = [] text = json.load(f) for poi_data in text['results']: first_element = next(iter(poi_data)) if self.name == 'CIB bank': name = 'CIB bank' code = 'hucibbank' else: name = 'CIB' code = 'hucibatm' postcode, city, street, housenumber, conscriptionnumber = extract_all_address( poi_data[first_element]['address']) branch = None website = None nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None lat, lon = check_hu_boundary( poi_data[first_element]['latitude'], poi_data[first_element]['longitude']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) original = poi_data[first_element]['address'] ref = None phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def test_extract_all_address(self): for i in self.addresses: original, postcode, city, street, housenumber, conscriptionnumber = i['original'], i['postcode'], i['city'], \ i['street'], i['housenumber'], i[ 'conscriptionnumber'] a, b, c, d, e = extract_all_address(original) with self.subTest(): self.assertEqual(postcode, a) with self.subTest(): self.assertEqual(city, b) with self.subTest(): self.assertEqual(street, c) with self.subTest(): self.assertEqual(housenumber, d) with self.subTest(): self.assertEqual(conscriptionnumber, e)
def process(self): try: if self.link: with open(self.link, 'r') as f: text = json.load(f) data = POIDataset() for poi_data in text['results']: first_element = next(iter(poi_data)) if self.name == 'K&H Bank': data.name = 'K&H Bank' data.code = 'hukhbank' data.public_holiday_open = False elif self.name == 'K&H Bank ATM': data.name = 'K&H Bank ATM' data.code = 'hukhatm' data.public_holiday_open = True if data.code == 'hukhatm': data.nonstop = True else: data.nonstop = False data.lat, data.lon = check_hu_boundary( poi_data.get(first_element)['latitude'], poi_data.get(first_element)['longitude']) if poi_data.get(first_element)['address'] is not None and \ poi_data.get(first_element)['address'] != '': data.postcode, data.city, data.street, data.housenumber, data.conscriptionnumber = \ extract_all_address( poi_data.get(first_element)['address']) data.original = poi_data.get( first_element)['address'] if poi_data.get( 'phoneNumber' ) is not None and poi_data.get('phoneNumber') != '': data.phone = clean_phone_to_str( poi_data.get('phoneNumber')) else: data.phone = None data.add() if data is None or data.lenght() < 1: logging.warning('Resultset is empty. Skipping ...') else: insert_poi_dataframe(self.session, data.process()) except Exception as e: logging.exception('Exception occurred') logging.error(e) logging.error(poi_data)
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) insert_data = [] if soup != None: # parse the html using beautiful soap and store in variable `soup` pattern = re.compile('var\s*markers\s*=\s*((.*\n)*\]\;)', re.MULTILINE) script = soup.find('script', text=pattern) m = pattern.search(script.get_text()) data = m.group(0) data = data.replace("'", '"') data = clean_javascript_variable(data, 'markers') text = json.loads(data) for poi_data in text: if poi_data['cim'] is not None and poi_data['cim'] != '': postcode, city, street, housenumber, conscriptionnumber = extract_all_address( poi_data['cim']) name = 'Avia' code = 'huaviafu' branch = None if city is None: city = poi_data['title'] ref = poi_data['kutid'] if poi_data[ 'kutid'] is not None and poi_data['kutid'] != '' else None lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) website = '/toltoallomas/?id={}'.format(str(poi_data['kutid'])) if poi_data['kutid'] is not None and \ poi_data['kutid'] != '' else None nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None original = poi_data['cim'] if 'tel' in poi_data and poi_data['tel'] != '': phone = clean_phone(poi_data['tel']) else: phone = None if 'email' in poi_data and poi_data['email'] != '': email = clean_email(poi_data['email']) else: email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` text = json.loads(extract_javascript_variable( soup, 'markers', True), strict=False) for poi_data in text: self.data.name = 'Avia' self.data.code = 'huaviafu' if self.data.city is None: self.data.city = poi_data['title'] self.data.ref = poi_data['kutid'] if poi_data['kutid'] is not None and poi_data['kutid'] != '' \ else None self.data.lat, self.data.lon = check_hu_boundary( poi_data['lat'], poi_data['lng']) if poi_data['cim'] is not None and poi_data['cim'] != '': self.data.postcode, self.data.city, self.data.street, self.data.housenumber, \ self.data.conscriptionnumber = extract_all_address( poi_data['cim']) self.data.website = '/toltoallomas/?id={}'.format(str(poi_data['kutid'])) \ if poi_data['kutid'] is not None and poi_data['kutid'] != '' else None self.data.original = poi_data['cim'] if 'tel' in poi_data and poi_data['tel'] != '': self.data.phone = clean_phone_to_str(poi_data['tel']) else: self.data.phone = None if 'email' in poi_data and poi_data['email'] != '': self.data.email = clean_email(poi_data['email']) else: self.data.email = None self.data.public_holiday_open = False self.data.fuel_octane_95 = True if poi_data.get('b95') == '1' or poi_data.get('b95g') == '1' \ else False self.data.fuel_diesel = True if poi_data.get('dies') == '1' or poi_data.get('gdies') == '1' \ else False self.data.fuel_octane_98 = True if poi_data.get( 'b98') == '1' else False self.data.fuel_lpg = True if poi_data.get( 'lpg') == '1' else False self.data.fuel_e85 = True if poi_data.get( 'e85') == '1' else False self.data.rent_lpg_bottles = True if poi_data.get( 'pgaz') == '1' else False self.data.compressed_air = True if poi_data.get( 'komp') == '1' else False self.data.restaurant = True if poi_data.get( 'etterem') == '1' else False self.data.food = True if poi_data.get( 'bufe') == '1' else False self.data.truck = True if poi_data.get( 'kpark') == '1' else False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)