def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) insert_data = [] if soup != None: text = json.loads(soup.get_text()) for poi_data in text: street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['street']) if 'BENU Gyógyszertár' not in poi_data['title']: name = poi_data['title'].strip() branch = None else: name = 'Benu gyógyszertár' branch = poi_data['title'].strip() code = 'hubenupha' website = poi_data['description'].strip( ) if poi_data['description'] is not None else None website = website[19:] nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None city = clean_city(poi_data['city']) postcode = poi_data['postal_code'].strip() lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) original = poi_data['street'] ref = None if 'phone' in poi_data and poi_data['phone'] != '': phone = clean_phone(poi_data['phone']) else: phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): try: if self.link: with open(self.link, 'r') as f: text = json.load(f) for poi_data in text['result']: if poi_data.get('address') is not None: if poi_data.get('type') == '1': self.data.name = 'MagNet Bank ATM' self.data.code = 'humagnatm' self.data.public_holiday_open = True elif poi_data.get('type') in ['0', '2']: self.data.name = 'MagNet Bank' self.data.code = 'humagnbank' self.data.public_holiday_open = False self.data.email = poi_data.get('email') self.data.phone = '+36 1 428 8888' else: logging.info('Unknow type! ({})'.format( poi_data.get('type'))) self.data.postcode, self.data.city, self.data.street, self.data.housenumber, \ self.data.conscriptionnumber = extract_all_address( poi_data.get('address')) self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('lat'), poi_data.get('lon')) self.data.original = poi_data.get('address') self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text['markets']: self.data.name = 'Penny' self.data.code = 'hupennysup' self.data.postcode = poi_data['address']['zip'].strip() street_tmp = poi_data['address']['street'].split(',')[0] self.data.city = clean_city(poi_data['address']['city']) self.data.original = poi_data['address']['street'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['address']['latitude'], poi_data['address']['longitude']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( street_tmp.title()) if 'phone' in poi_data and poi_data['phone'] != '': self.data.phone = clean_phone_to_str(poi_data['phone']) if 'id' in poi_data and poi_data['id'] != '': self.data.ref = poi_data['id'].strip() self.data.public_holiday_open = False # TODO: Parsing opening_hours from datasource self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` # script = soup.find('div', attrs={'data-stores':True}) text = json.loads(str(soup)) for poi_data in text['stores']: try: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, # conscriptionnumber, ref, geom self.data.code = 'hutommacon' if poi_data.get( 'name' )[2] is not None and poi_data.get('name')[2] != '': self.data.ref = poi_data.get('name')[2] if poi_data.get( 'website' ) is not None and poi_data.get('website') != '': self.data.website = poi_data.get('website') else: self.data.website = 'https://tommarket.hu' self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('lat'), poi_data.get('long')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) if poi_data.get('zip') is not None and poi_data.get( 'zip') != '': self.data.postcode = poi_data.get('zip') self.data.original = poi_data.get('address') if poi_data.get( 'settlement' ) is not None and poi_data.get('settlement') != '': self.data.city = clean_city( poi_data.get('settlement')) else: self.data.city = query_osm_city_name_gpd( self.session, self.data.lat, self.data.lon) if poi_data.get('phone') is not None and poi_data.get( 'phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if poi_data.get('email') is not None and poi_data.get( 'email') != '': self.data.phone = poi_data.get('email').strip() self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): xml = save_downloaded_xml( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) insert_data = [] root = etree.fromstring(xml) for e in root.iter('place'): name = 'MOL Bubi' code = 'hububibir' housenumber = None conscriptionnumber = None street = None city = 'Budapest' branch = e.attrib['name'].split( '-')[1].strip() if e.attrib['name'] is not None else None ref = e.attrib['name'].split( '-')[0].strip() if e.attrib['name'] is not None else None capacity = e.attrib['bike_racks'].strip( ) if e.attrib['bike_racks'] is not None else None website = None nonstop = True mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None lat, lon = check_hu_boundary(e.attrib['lat'].replace(',', '.'), e.attrib['lng'].replace(',', '.')) geom = check_geom(lat, lon) postcode = query_postcode_osm_external(self.prefer_osm_postcode, self.session, lat, lon, None) original = None ref = None phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) print(insert_data) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): if self.link: with open(self.link, 'r') as f: insert_data = [] text = json.load(f) for poi_data in text['results']: first_element = next(iter(poi_data)) if self.name == 'CIB bank': name = 'CIB bank' code = 'hucibbank' else: name = 'CIB' code = 'hucibatm' postcode, city, street, housenumber, conscriptionnumber = extract_all_address( poi_data[first_element]['address']) branch = None website = None nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None lat, lon = check_hu_boundary( poi_data[first_element]['latitude'], poi_data[first_element]['longitude']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) original = poi_data[first_element]['address'] ref = None phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text: try: self.data.name = 'OIL!' self.data.code = 'huoilfu' if poi_data.get('zip') is not None and poi_data.get( 'zip') != '': self.data.postcode = poi_data.get('zip').strip() if poi_data.get('city') is not None and poi_data.get( 'city') != '': self.data.city = clean_city(poi_data.get('city')) self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('lat'), poi_data.get('lng')) if poi_data.get( 'address' ) is not None and poi_data.get('address') != '': self.data.original = poi_data.get('address') self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) if poi_data.get('phone') is not None and poi_data.get( 'phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) self.data.fuel_octane_95 = True self.data.fuel_diesel = True if poi_data.get( 'id') is not None and poi_data.get('id') != '': self.data.ref = poi_data.get('id').strip() if poi_data.get('url') is not None and poi_data.get( 'url') != '': self.data.website = poi_data.get('url').strip() else: self.data.website = 'https://www.oil-benzinkutak.hu' if poi_data.get('store') is not None and poi_data.get( 'store') != '': tmp = poi_data.get('store').split(' ', 1) self.data.branch = tmp[1].strip().capitalize() self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` text = json.loads( extract_javascript_variable(soup, 'boltok_nyers')) for poi_data in text: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom self.data.city = clean_city(poi_data.get('A_VAROS')) self.data.postcode = poi_data.get('A_IRSZ').strip() self.data.branch = poi_data.get('P_NAME').strip() self.data.name = 'Príma' if 'Príma' in self.data.branch else 'CBA' self.data.code = 'huprimacon' if 'Príma' in self.data.branch else 'hucbacon' for i in range(0, 7): self.data.day_open( i, clean_opening_hours_2( poi_data.get('PS_OPEN_FROM_{}'.format(i + 1))) if poi_data.get('PS_OPEN_FROM_{}'.format(i + 1)) is not None else None) self.data.day_close( i, clean_opening_hours_2( poi_data.get('PS_OPEN_TO_{}'.format(i + 1))) if poi_data.get('PS_OPEN_TO_{}'.format(i + 1)) is not None else None) self.data.original = poi_data.get('A_CIM') self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('PS_GPS_COORDS_LAT'), poi_data.get('PS_GPS_COORDS_LNG')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data.get('A_CIM')) if 'PS_PUBLIC_TEL' in poi_data and poi_data.get( 'PS_PUBLIC_TEL') != '': self.data.phone = clean_phone_to_str( poi_data.get('PS_PUBLIC_TEL')) else: self.data.phone = None if 'PS_PUBLIC_EMAIL' in poi_data and poi_data.get( 'PS_PUBLIC_EMAIL') != '': self.data.email = poi_data.get('PS_PUBLIC_EMAIL') else: self.data.email = None self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(str(soup)) for poi_data in text.get('list'): try: if poi_data.get('country_id') != 3: continue else: self.data.name = 'Yves Rocher' self.data.code = 'huyvesrcos' self.data.lat, self.data.lon = \ check_hu_boundary(poi_data.get( 'latitude'), poi_data.get('longitude')) self.data.website = 'https://www.yves-rocher.hu{}/'.format( poi_data.get('request_path')) opening = poi_data.get('hours') for i in range(0, 7): if i in opening: self.data.day_open( i, opening[i]['hour_from']) self.data.day_close( i, opening[i]['hour_to']) self.data.postcode = poi_data.get('zip') self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) self.data.city = clean_city(poi_data.get('city')) self.data.original = poi_data.get('address') if poi_data.get('phone') is not None and poi_data.get('phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \ and self.data.phone is not None: self.data.phone = '{};{}'.format(self.data.phone, clean_phone_to_str(poi_data.get('mobile'))) elif poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \ and self.data.phone is None: self.data.phone = clean_phone_to_str( poi_data.get('mobile')) self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), POST_DATA) insert_data = [] if soup != None: text = json.loads(soup.get_text()) for poi_data in text: name = 'MOL' code = 'humolfu' postcode = poi_data['postcode'].strip() street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) city = clean_city(poi_data['city']) branch = None website = None nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None original = poi_data['address'] ref = None lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text['data']: ''' The Pepco dataset contains all European data. Since the program cannot handle POIs outside Hungary (so far) this will limit only for Hungarian POIs In fact this depends on OSM extract but currently we use only Hungarian OSM extract Select only Hungarian POIs ''' if 'city' in poi_data and ( poi_data['city'] == '' or query_osm_city_name( self.session, poi_data['city']) is None): continue elif 'city' in poi_data: self.data.city = clean_city(poi_data['city']) else: continue self.data.name = 'Pepco' self.data.code = 'hupepcoclo' # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom self.data.lat, self.data.lon = \ check_hu_boundary( poi_data['coordinates']['lat'], poi_data['coordinates']['lng']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('streetAddress')) self.data.original = poi_data.get('streetAddress') self.data.postcode = poi_data.get('postalCode') # self.data.city = query_osm_city_name_gpd(self.session, self.data.lat, self.data.lon) # Assign opening_hours opening = poi_data['openingHours'] for i in range(0, 7): if i in opening: self.data.day_open(i, opening[i]['from']) self.data.day_close(i, opening[i]['to']) # Assign additional informations self.data.phone = clean_phone_to_str( poi_data.get('phoneNumber')) self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(logging.error(e))
def process(self): try: if self.link: # soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, # self.filename), self.post, self.verify_link, headers=self.headers) with open(os.path.join(self.download_cache, self.filename), 'r') as f: text = json.load(f) if text is not None: text = json.loads(text, strict=False) for poi_data in text: try: if 'Kulcs patika' not in poi_data.get('nev'): self.data.name = poi_data.get( 'nev').strip() self.data.branch = None else: self.data.name = 'Kulcs patika' self.data.branch = poi_data.get( 'nev').strip() self.data.code = 'hukulcspha' if poi_data.get('link') is not None and poi_data.get('link') != '': self.data.website = poi_data.get('link').strip() if poi_data.get('link') \ is not None else None if poi_data.get('helyseg') is not None and poi_data.get('helyseg') != '': self.data.city = clean_city( poi_data.get('helyseg')) self.data.lat, self.data.lon = \ check_hu_boundary(poi_data.get('marker_position')['latitude'], poi_data.get('marker_position')['longitude']) if poi_data.get('cim') is not None and poi_data.get('cim') != '': self.data.original = poi_data.get('cim') self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('cim')) if poi_data.get('irsz') is not None and poi_data.get('irsz') != '': self.data.postcode = poi_data.get( 'irsz').strip() self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` text = json.loads( extract_javascript_variable(soup, 'totem_stations')) for poi_data in text.values(): self.data.name = 'Mobil Petrol' self.data.code = 'humobpefu' self.data.website = poi_data.get('description') self.data.city = clean_city(poi_data.get('city')) self.data.original = poi_data.get('address') self.data.lat, self.data.lon = check_hu_boundary(poi_data['location']['lat'], poi_data['location']['lng']) self.data.postcode = None self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data.get('address')) self.data.phone = clean_phone_to_str(poi_data.get('phone')) self.data.public_holiday_open = False if '0-24' in poi_data.get('services'): self.data.nonstop = True self.data.public_holiday_open = True else: if '6-22' in poi_data.get('services'): open_from = '06:00' open_to = '22:00' elif '6-21' in poi_data.get('services'): open_from = '06:00' open_to = '21:00' elif '5-22' in poi_data.get('services'): open_from = '05:00' open_to = '22:00' elif '6-18' in poi_data.get('services'): open_from = '06:00' open_to = '18:00' if 'open_from' in locals() and 'open_to' in locals(): for i in range(0, 7): self.data.day_open(i, open_from) self.data.day_close(i, open_to) self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: if self.link: with open(self.link, 'r') as f: text = json.load(f) data = POIDataset() for poi_data in text['results']: first_element = next(iter(poi_data)) if self.name == 'K&H Bank': data.name = 'K&H Bank' data.code = 'hukhbank' data.public_holiday_open = False elif self.name == 'K&H Bank ATM': data.name = 'K&H Bank ATM' data.code = 'hukhatm' data.public_holiday_open = True if data.code == 'hukhatm': data.nonstop = True else: data.nonstop = False data.lat, data.lon = check_hu_boundary( poi_data.get(first_element)['latitude'], poi_data.get(first_element)['longitude']) if poi_data.get(first_element)['address'] is not None and \ poi_data.get(first_element)['address'] != '': data.postcode, data.city, data.street, data.housenumber, data.conscriptionnumber = \ extract_all_address( poi_data.get(first_element)['address']) data.original = poi_data.get( first_element)['address'] if poi_data.get( 'phoneNumber' ) is not None and poi_data.get('phoneNumber') != '': data.phone = clean_phone_to_str( poi_data.get('phoneNumber')) else: data.phone = None data.add() if data is None or data.lenght() < 1: logging.warning('Resultset is empty. Skipping ...') else: insert_poi_dataframe(self.session, data.process()) except Exception as e: logging.exception('Exception occurred') logging.error(e) logging.error(poi_data)
def process(self): soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype, POST_DATA) if soup is not None: text = json.loads(soup) for poi_data in text: self.data.name = 'Nemzeti Dohánybolt' self.data.code = 'hunemdotob' self.data.postcode = poi_data.get('postcode').strip() self.data.city = clean_city(poi_data['city']) self.data.original = poi_data['address'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['lat'], poi_data['lng']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) self.data.public_holiday_open = False self.data.add()
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom if 'xpres' in poi_data['name']: self.data.name = 'Spar Expressz' self.data.code = 'husparecon' elif 'INTER' in poi_data['name']: self.data.name = 'Interspar' self.data.code = 'husparisup' elif 'market' in poi_data['name']: self.data.name = 'Spar' self.data.code = 'husparsup' elif 'DESPAR' in poi_data['name']: self.data.name = 'DeSpar' self.data.code = 'huspardcon' else: self.data.name = 'Spar' self.data.code = 'husparsup' poi_data['name'] = poi_data['name'].replace( 'INTERSPAR', 'Interspar') poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar') ref_match = PATTERN_SPAR_REF.search(poi_data['name']) self.data.ref = ref_match.group( 1).strip() if ref_match is not None else None self.data.city = clean_city(poi_data['city']) self.data.postcode = poi_data.get('zipCode').strip() self.data.branch = poi_data['name'].split('(')[0].strip() self.data.website = poi_data['pageUrl'].strip() self.data.lat, self.data.lon = check_hu_boundary( poi_data['latitude'], poi_data['longitude']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) self.data.original = poi_data['address'] self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype, POST_DATA) if soup is not None: text = json.loads(soup) for poi_data in text: self.data.name = 'MOL' if " Sziget " in poi_data.get('name'): self.data.code = 'humolwfu' else: self.data.code = 'humolfu' self.data.postcode = poi_data.get('postcode').strip() self.data.city = clean_city(poi_data['city']) self.data.original = poi_data['address'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['lat'], poi_data['lng']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) self.data.public_holiday_open = True self.data.truck = True if 'kamion_parkolo' in poi_data.get( 'servicesin') else False self.data.food = True if 'fresh_corner' in poi_data.get( 'servicesin') else False self.data.rent_lpg_bottles = True if 'pb' in poi_data.get( 'servicesin') else False self.data.fuel_adblue = True if 'adblue' in poi_data.get( 'servicesin') else False self.data.fuel_lpg = True if 'lpg' in poi_data.get( 'servicesin') else False self.data.fuel_octane_95 = True self.data.fuel_diesel = True self.data.fuel_octane_100 = True self.data.fuel_diesel_gtl = True self.data.compressed_air = True self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: csv = pd.read_csv(self.link, encoding='UTF-8', sep=';', skiprows=1) if csv is not None: poi_dict = csv.to_dict('records') for poi_data in poi_dict: self.data.name = 'Mobiliti' self.data.code = 'humobilchs' self.data.ref = poi_data.get('Mobiliti azonosító') self.data.branch = poi_data.get('Töltőpont neve') self.data.postcode = poi_data.get('Irányító szám') self.data.city = clean_city(poi_data.get('Település')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('Cím')) self.data.original = poi_data.get('Cím') temp = poi_data.get('GPS koordináták') if temp is None: continue else: self.data.lat, self.data.lon = temp.split(',') self.data.lat, self.data.lon = check_hu_boundary( self.data.lat, self.data.lon) self.data.socket_chademo = poi_data.get('Darab (CHAdeMO)') self.data.socket_chademo_output = poi_data.get( 'Teljesítmény (CHAdeMO)') self.data.socket_type2_combo = poi_data.get('Darab (CCS)') self.data.socket_type2_combo_output = poi_data.get( 'Teljesítmény (CCS)') self.data.socket_type2_cable = poi_data.get( 'Darab (Type 2)') self.data.socket_type2_cable_output = poi_data.get( 'Teljesítmény (Type 2)') self.data.manufacturer = poi_data.get('Gyártó') self.data.model = poi_data.get('Típus') self.data.capacity = poi_data.get('Kapacitás') self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: if self.link: with open(self.link, 'r') as f: text = json.load(f) data = POIDataset() for poi_data in text['availableLocations']: if 'locationStatus' in poi_data and poi_data[ 'locationStatus'] == 'IN_SERVICE': if self.name == 'CIB Bank': data.name = 'CIB Bank' data.code = 'hucibbank' data.public_holiday_open = False else: data.name = 'CIB Bank ATM' data.code = 'hucibatm' data.public_holiday_open = True data.lat, data.lon = check_hu_boundary( poi_data['location']['lat'], poi_data['location']['lon']) data.city = clean_city(poi_data['city']) data.postcode = poi_data.get('zip').strip() data.housenumber = poi_data['streetNo'].strip() data.street = poi_data['streetName'].strip() data.branch = poi_data['name'] if 'phone' in poi_data and poi_data['phone'] != '': data.phone = clean_phone_to_str( poi_data['phone']) if 'email' in poi_data and poi_data['email'] != '': data.email = poi_data['email'].strip() data.original = poi_data['fullAddress'] data.add() if data is None or data.lenght() < 1: logging.warning('Resultset is empty. Skipping ...') else: insert_poi_dataframe(self.session, data.process()) except Exception as e: logging.exception('Exception occurred') logging.error(e) logging.error(poi_data)
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(str(soup)) for poi_data in text: try: if 'BENU Gyógyszertár' not in poi_data.get('title'): self.data.name = poi_data.get('title').strip() self.data.branch = None else: self.data.name = 'Benu gyógyszertár' self.data.branch = poi_data.get('title').strip() self.data.code = 'hubenupha' if poi_data.get('description') is not None: pu_match = PATTERN_FULL_URL.match(poi_data.get('description')) self.data.website = pu_match.group(0).strip() if pu_match is not None else None else: self.data.website = None self.data.city = clean_city(poi_data.get('city')) self.data.postcode = poi_data.get('postal_code').strip() self.data.lat, self.data.lon = check_hu_boundary(poi_data.get('lat'), poi_data.get('lng')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data.get(('street'))) self.data.original = poi_data.get('street') if 'phone' in poi_data and poi_data.get('phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) else: self.data.phone = None self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): try: csv = pd.read_csv(self.link, encoding='UTF-8', sep=';', skiprows=1) if csv is not None: poi_dict = csv.to_dict('records') for poi_data in poi_dict: self.data.name = 'MOL Plugee' self.data.code = 'humolplchs' self.data.ref = poi_data.get('Azonosító') self.data.postcode = poi_data.get('Irányító szám') self.data.city = clean_city(poi_data.get('Település')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('Cím')) self.data.original = poi_data.get('Cím') lat, lng = poi_data.get('X'), poi_data.get('Y') self.data.lat, self.data.lon = check_hu_boundary( lat.replace(',', '.'), lng.replace(',', '.')) self.data.socket_chademo = poi_data.get('Darab (CHAdeMO)') self.data.socket_chademo_output = poi_data.get( 'Teljesítmény (CHAdeMO)') self.data.socket_type2_combo = poi_data.get('Darab (CCS)') self.data.socket_type2_combo_output = poi_data.get( 'Teljesítmény (CCS)') self.data.socket_type2_cable = poi_data.get( 'Darab (Type 2)') self.data.socket_type2_cable_output = poi_data.get( 'Teljesítmény (Type 2)') self.data.socket_type2 = poi_data.get( 'Darab (Type 2 – kábel nélkül)') self.data.socket_type2_output = poi_data.get( 'Teljesítmény (Type 2 – kábel nélkül)') self.data.manufacturer = poi_data.get('Gyártó') self.data.model = poi_data.get('Típus') self.data.capacity = poi_data.get('Kapacitás') self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype, None, self.verify_link) if soup is not None: # parse the html using beautiful soap and store in variable `soup` text = json.loads( extract_javascript_variable(soup, 'locations')) for poi_data in text: poi_data = poi_data['addresses'][0] # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom self.data.name = 'Rossmann' self.data.code = 'hurossmche' self.data.city = clean_city(poi_data['city']) self.data.postcode = poi_data.get('zip').strip() for i in range(0, 7): if poi_data['business_hours'][WeekDaysLong( i).name.lower()] is not None: opening, closing = clean_opening_hours( poi_data['business_hours'][WeekDaysLong( i).name.lower()]) self.data.day_open_close(i, opening, closing) else: self.data.day_open_close(i, None, None) self.data.lat, self.data.lon = check_hu_boundary( poi_data['position'][0], poi_data['position'][1]) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) self.data.original = poi_data['address'] self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text.get('stores'): self.data.name = 'OBI' self.data.code = 'huobidiy' self.data.postcode = poi_data['address']['zip'].strip() self.data.city = clean_city(poi_data['address']['city']) self.data.original = poi_data['address']['street'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['address']['lat'], poi_data['address']['lon']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']['street']) if 'phone' in poi_data and poi_data.get('phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if 'storeNumber' in poi_data and poi_data.get( 'storeNumber') != '': self.data.ref = poi_data.get('storeNumber').strip() if 'email' in poi_data and poi_data.get('email') != '': self.data.email = clean_email(poi_data.get('email')) if 'path' in poi_data and poi_data.get('path') != '': self.data.website = poi_data.get('path') # TODO: opening hour parser for poi_data.get('hours'), format is like: # Hétfő - Szombat: 8:00 - 20:00\nVasárnap: 08:00 - 18:00 # self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: soup_data = soup.find( 'script', {'data-drupal-selector': 'drupal-settings-json'}) json_data = json.loads(soup_data.text, strict=False) for shop in json_data['storesLocator']['BuildCoordinates']: self.data.name = 'Jysk' self.data.code = 'hujyskfur' self.data.lat, self.data.lon = check_hu_boundary( shop.get('lat'), shop.get('lon')) self.data.branch = shop.get('name') internal_id = shop.get('id') shop_soup = save_downloaded_soup('{}?storeId={}'.format(self.link, internal_id), os.path.join(self.download_cache, '{}.{}.html'.format(self.filename, internal_id))) self.data.phone = '+36 1 700 8400' self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text: self.data.name = 'Foxpost' self.data.code = 'hufoxpocso' self.data.postcode = poi_data['zip'].strip() self.data.city = clean_city(poi_data['city']) self.data.branch = poi_data['name'] for i in range(0, 7): if poi_data['open'][WeekDaysLongHUUnAccented( i).name.lower()] is not None: opening, closing = clean_opening_hours( poi_data['open'][WeekDaysLongHUUnAccented( i).name.lower()]) self.data.day_open(i, opening) self.data.day_close(i, closing) else: self.data.day_open_close(i, None, None) self.data.original = poi_data['address'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['geolat'], poi_data['geolng']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['street']) self.data.public_holiday_open = False self.data.description = poi_data.get('findme') self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) for pla in soup.findAll('place'): try: self.data.name = 'MOL Bubi' self.data.code = 'hububibir' self.data.city = 'Budapest' if pla.get('name') is not None and pla.get('name') != '': self.data.branch = pla.get('name').split('-')[1].strip() \ if pla.get('name') is not None else None self.data.ref = pla.get('name').split('-')[0].strip() \ if pla.get('name') is not None else None self.data.nonstop = True # self.data.capacity = pla.attrib['bike_racks'].strip() \ # if pla.attrib['bike_racks'] is not None else None self.data.lat, self.data.lon = \ check_hu_boundary(pla.get('lat').replace(',', '.'), pla.get('lng').replace(',', '.')) self.data.postcode = query_postcode_osm_external( True, self.session, self.data.lon, self.data.lat, None) self.data.public_holiday_open = True self.data.add() except Exception as e: logging.error(e) logging.error(pla) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred') logging.error(soup)
def process(self): xml = save_downloaded_xml( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) insert_data = [] root = etree.fromstring(xml) for e in root.findall('post'): if e.find('ServicePointType').text == 'PM': name = 'Posta' code = 'hupostapo' elif e.find('ServicePointType').text == 'CS': name = 'Posta csomagautomata' code = 'hupostacso' elif e.find('ServicePointType').text == 'PP': name = 'PostaPont' code = 'hupostapp' else: logging.error('Non existing Posta type.') postcode = e.get('zipCode') street_tmp_1 = e.find('street/name').text.strip() if e.find( 'street/name').text is not None else None street_tmp_2 = e.find('street/type').text.strip() if e.find( 'street/type').text is not None else None if street_tmp_1 is None: street = None elif street_tmp_2 is None: street = street_tmp_1 elif street_tmp_1 is not None and street_tmp_2 is not None: street = '{} {}'.format(street_tmp_1, street_tmp_2) else: logging.error('Non handled state!') housenumber = e.find('street/houseNumber').text.strip().lower( ) if e.find('street/houseNumber').text is not None else None conscriptionnumber = None city = clean_city(e.find('city').text) branch = e.find('name').text if e.find( 'name').text is not None else None website = None nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None lat, lon = check_hu_boundary( e.find('gpsData/WGSLat').text.replace(',', '.'), e.find('gpsData/WGSLon').text.replace(',', '.')) geom = check_geom(lat, lon) postcode = query_postcode_osm_external(self.prefer_osm_postcode, self.session, lat, lon, postcode) original = None ref = None phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), None, self.verify_link) insert_data = [] if soup != None: # parse the html using beautiful soap and store in variable `soup` pattern = re.compile('^\s*var\s*places.*') script = soup.find('script', text=pattern) m = pattern.match(script.get_text()) data = m.group(0) data = clean_javascript_variable(data, 'places') text = json.loads(data) for poi_data in text: poi_data = poi_data['addresses'][0] # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) name = 'Rossmann' code = 'hurossmche' city = clean_city(poi_data['city']) postcode = poi_data['zip'].strip() branch = None website = None nonstop = False if poi_data['business_hours']['monday'] is not None: mo_o, mo_c = clean_opening_hours(poi_data['business_hours']['monday']) else: mo_o, mo_c = None, None if poi_data['business_hours']['tuesday'] is not None: th_o, th_c = clean_opening_hours(poi_data['business_hours']['tuesday']) else: th_o, th_c = None, None if poi_data['business_hours']['wednesday'] is not None: we_o, we_c = clean_opening_hours(poi_data['business_hours']['wednesday']) else: we_o, we_c = None, None if poi_data['business_hours']['thursday'] is not None: tu_o, tu_c = clean_opening_hours(poi_data['business_hours']['thursday']) else: tu_o, tu_c = None, None if poi_data['business_hours']['friday'] is not None: fr_o, fr_c = clean_opening_hours(poi_data['business_hours']['friday']) else: fr_o, fr_c = None, None if poi_data['business_hours']['saturday'] is not None: sa_o, sa_c = clean_opening_hours(poi_data['business_hours']['saturday']) else: sa_o, sa_c = None, None if poi_data['business_hours']['sunday'] is not None: su_o, su_c = clean_opening_hours(poi_data['business_hours']['sunday']) else: su_o, su_c = None, None lat, lon = check_hu_boundary(poi_data['position'][0], poi_data['position'][1]) geom = check_geom(lat, lon) postcode = query_postcode_osm_external(self.prefer_osm_postcode, self.session, lat, lon, postcode) original = poi_data['address'] ref = None phone = None email = None insert_data.append( [code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), POST_DATA) insert_data = [] if soup != None: text = json.loads(soup.get_text()) for poi_data in text['results']: name = 'OMV' code = 'huomvfu' postcode = poi_data['postcode'].strip() street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['address_l']) city = clean_city(poi_data['town_l']) branch = None website = None nonstop = None if poi_data['open_hours'] is not None: oho, ohc = clean_opening_hours(poi_data['open_hours']) if oho == '00:00' and ohc == '24:00': nonstop = True oho, ohc = None, None else: oho, ohc = None, None mo_o = oho th_o = oho we_o = oho tu_o = oho fr_o = oho sa_o = oho su_o = oho mo_c = ohc th_c = ohc we_c = ohc tu_c = ohc fr_c = ohc sa_c = ohc su_c = ohc original = poi_data['address_l'] ref = None lat, lon = check_hu_boundary(poi_data['y'], poi_data['x']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) if 'telnr' in poi_data and poi_data['telnr'] != '': phone = clean_phone(poi_data['telnr']) else: phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) insert_data = [] if soup != None: text = json.loads(soup.get_text()) for poi_data in text: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) if 'xpres' in poi_data['name']: name = 'Spar Expressz' code = 'husparexp' elif 'INTER' in poi_data['name']: name = 'Interspar' code = 'husparint' elif 'market' in poi_data['name']: name = 'Spar' code = 'husparsup' else: name = 'Spar' code = 'husparsup' poi_data['name'] = poi_data['name'].replace( 'INTERSPAR', 'Interspar') poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar') ref_match = PATTERN_SPAR_REF.search(poi_data['name']) ref = ref_match.group( 1).strip() if ref_match is not None else None city = clean_city(poi_data['city']) postcode = poi_data['zipCode'].strip() branch = poi_data['name'].split('(')[0].strip() website = poi_data['pageUrl'].strip() nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None lat, lon = check_hu_boundary(poi_data['latitude'], poi_data['longitude']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) original = poi_data['address'] phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)