def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) insert_data = [] if soup != None: text = json.loads(soup.get_text()) for poi_data in text: street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['street']) if 'BENU Gyógyszertár' not in poi_data['title']: name = poi_data['title'].strip() branch = None else: name = 'Benu gyógyszertár' branch = poi_data['title'].strip() code = 'hubenupha' website = poi_data['description'].strip( ) if poi_data['description'] is not None else None website = website[19:] nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None city = clean_city(poi_data['city']) postcode = poi_data['postal_code'].strip() lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) original = poi_data['street'] ref = None if 'phone' in poi_data and poi_data['phone'] != '': phone = clean_phone(poi_data['phone']) else: phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` # script = soup.find('div', attrs={'data-stores':True}) text = json.loads(str(soup)) for poi_data in text['stores']: try: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, # conscriptionnumber, ref, geom self.data.code = 'hutommacon' if poi_data.get( 'name' )[2] is not None and poi_data.get('name')[2] != '': self.data.ref = poi_data.get('name')[2] if poi_data.get( 'website' ) is not None and poi_data.get('website') != '': self.data.website = poi_data.get('website') else: self.data.website = 'https://tommarket.hu' self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('lat'), poi_data.get('long')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) if poi_data.get('zip') is not None and poi_data.get( 'zip') != '': self.data.postcode = poi_data.get('zip') self.data.original = poi_data.get('address') if poi_data.get( 'settlement' ) is not None and poi_data.get('settlement') != '': self.data.city = clean_city( poi_data.get('settlement')) else: self.data.city = query_osm_city_name_gpd( self.session, self.data.lat, self.data.lon) if poi_data.get('phone') is not None and poi_data.get( 'phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if poi_data.get('email') is not None and poi_data.get( 'email') != '': self.data.phone = poi_data.get('email').strip() self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) data = [] insert_data = [] if soup != None: # parse the html using beautiful soap and store in variable `soup` table = soup.find('table', attrs={'class': 'contenttable is-header-top'}) table_body = table.find('tbody') rows = table_body.find_all('tr') for row in rows: cols = row.find_all('td') cols = [element.text.strip() for element in cols] data.append(cols) for poi_data in data: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data[2]) name = 'Aldi' code = 'hualdisup' postcode = poi_data[0].strip() city = clean_city(poi_data[1]) branch = None website = None nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None original = poi_data[2] geom = None ref = None phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def test_extract_street_housenumber_better_2(self): for i in self.addresses: original, street, housenumber, conscriptionnumber = i['original'], i['street'], i['housenumber'], i[ 'conscriptionnumber'] a, b, c = extract_street_housenumber_better_2(original) with self.subTest(): self.assertEqual(street, a) with self.subTest(): self.assertEqual(housenumber, b) with self.subTest(): self.assertEqual(conscriptionnumber, c)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text: try: self.data.name = 'OIL!' self.data.code = 'huoilfu' if poi_data.get('zip') is not None and poi_data.get( 'zip') != '': self.data.postcode = poi_data.get('zip').strip() if poi_data.get('city') is not None and poi_data.get( 'city') != '': self.data.city = clean_city(poi_data.get('city')) self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('lat'), poi_data.get('lng')) if poi_data.get( 'address' ) is not None and poi_data.get('address') != '': self.data.original = poi_data.get('address') self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) if poi_data.get('phone') is not None and poi_data.get( 'phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) self.data.fuel_octane_95 = True self.data.fuel_diesel = True if poi_data.get( 'id') is not None and poi_data.get('id') != '': self.data.ref = poi_data.get('id').strip() if poi_data.get('url') is not None and poi_data.get( 'url') != '': self.data.website = poi_data.get('url').strip() else: self.data.website = 'https://www.oil-benzinkutak.hu' if poi_data.get('store') is not None and poi_data.get( 'store') != '': tmp = poi_data.get('store').split(' ', 1) self.data.branch = tmp[1].strip().capitalize() self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(str(soup)) for poi_data in text.get('list'): try: if poi_data.get('country_id') != 3: continue else: self.data.name = 'Yves Rocher' self.data.code = 'huyvesrcos' self.data.lat, self.data.lon = \ check_hu_boundary(poi_data.get( 'latitude'), poi_data.get('longitude')) self.data.website = 'https://www.yves-rocher.hu{}/'.format( poi_data.get('request_path')) opening = poi_data.get('hours') for i in range(0, 7): if i in opening: self.data.day_open( i, opening[i]['hour_from']) self.data.day_close( i, opening[i]['hour_to']) self.data.postcode = poi_data.get('zip') self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) self.data.city = clean_city(poi_data.get('city')) self.data.original = poi_data.get('address') if poi_data.get('phone') is not None and poi_data.get('phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \ and self.data.phone is not None: self.data.phone = '{};{}'.format(self.data.phone, clean_phone_to_str(poi_data.get('mobile'))) elif poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \ and self.data.phone is None: self.data.phone = clean_phone_to_str( poi_data.get('mobile')) self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), POST_DATA) insert_data = [] if soup != None: text = json.loads(soup.get_text()) for poi_data in text: name = 'MOL' code = 'humolfu' postcode = poi_data['postcode'].strip() street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) city = clean_city(poi_data['city']) branch = None website = None nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None original = poi_data['address'] ref = None lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text['data']: ''' The Pepco dataset contains all European data. Since the program cannot handle POIs outside Hungary (so far) this will limit only for Hungarian POIs In fact this depends on OSM extract but currently we use only Hungarian OSM extract Select only Hungarian POIs ''' if 'city' in poi_data and ( poi_data['city'] == '' or query_osm_city_name( self.session, poi_data['city']) is None): continue elif 'city' in poi_data: self.data.city = clean_city(poi_data['city']) else: continue self.data.name = 'Pepco' self.data.code = 'hupepcoclo' # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom self.data.lat, self.data.lon = \ check_hu_boundary( poi_data['coordinates']['lat'], poi_data['coordinates']['lng']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('streetAddress')) self.data.original = poi_data.get('streetAddress') self.data.postcode = poi_data.get('postalCode') # self.data.city = query_osm_city_name_gpd(self.session, self.data.lat, self.data.lon) # Assign opening_hours opening = poi_data['openingHours'] for i in range(0, 7): if i in opening: self.data.day_open(i, opening[i]['from']) self.data.day_close(i, opening[i]['to']) # Assign additional informations self.data.phone = clean_phone_to_str( poi_data.get('phoneNumber')) self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(logging.error(e))
def process(self): try: if self.link: # soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, # self.filename), self.post, self.verify_link, headers=self.headers) with open(os.path.join(self.download_cache, self.filename), 'r') as f: text = json.load(f) if text is not None: text = json.loads(text, strict=False) for poi_data in text: try: if 'Kulcs patika' not in poi_data.get('nev'): self.data.name = poi_data.get( 'nev').strip() self.data.branch = None else: self.data.name = 'Kulcs patika' self.data.branch = poi_data.get( 'nev').strip() self.data.code = 'hukulcspha' if poi_data.get('link') is not None and poi_data.get('link') != '': self.data.website = poi_data.get('link').strip() if poi_data.get('link') \ is not None else None if poi_data.get('helyseg') is not None and poi_data.get('helyseg') != '': self.data.city = clean_city( poi_data.get('helyseg')) self.data.lat, self.data.lon = \ check_hu_boundary(poi_data.get('marker_position')['latitude'], poi_data.get('marker_position')['longitude']) if poi_data.get('cim') is not None and poi_data.get('cim') != '': self.data.original = poi_data.get('cim') self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('cim')) if poi_data.get('irsz') is not None and poi_data.get('irsz') != '': self.data.postcode = poi_data.get( 'irsz').strip() self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) data = POIDataset() for poi_data in text['items']: if poi_data['type'] == 'posta': if 'mobilposta' in poi_data['name']: data.name = 'Mobilposta' data.code = 'hupostamp' else: data.name = 'Posta' data.code = 'hupostapo' data.public_holiday_open = False elif poi_data['type'] == 'csekkautomata': data.name = 'Posta csekkautomata' data.code = 'hupostacse' data.public_holiday_open = True elif poi_data['type'] == 'postamachine': data.name = 'Posta csomagautomata' data.code = 'hupostacso' data.public_holiday_open = True elif poi_data['type'] == 'postapoint': data.name = 'PostaPont' data.code = 'hupostapp' data.public_holiday_open = False else: logging.error('Non existing Posta type.') data.postcode = poi_data['zipCode'].strip() data.city = clean_city(poi_data['city']) data.branch = poi_data['name'] data.lat = poi_data['lat'] data.lon = poi_data['lng'] data.street, data.housenumber, data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) data.original = poi_data['address'] data.add() if data is None or data.lenght() < 1: logging.warning('Resultset is empty. Skipping ...') else: insert_poi_dataframe(self.session, data.process())
def process(self): try: csv = pd.read_csv(self.link, encoding='UTF-8', sep=';', skiprows=1) if csv is not None: poi_dict = csv.to_dict('records') for poi_data in poi_dict: self.data.name = 'Mobiliti' self.data.code = 'humobilchs' self.data.ref = poi_data.get('Mobiliti azonosító') self.data.branch = poi_data.get('Töltőpont neve') self.data.postcode = poi_data.get('Irányító szám') self.data.city = clean_city(poi_data.get('Település')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('Cím')) self.data.original = poi_data.get('Cím') temp = poi_data.get('GPS koordináták') if temp is None: continue else: self.data.lat, self.data.lon = temp.split(',') self.data.lat, self.data.lon = check_hu_boundary( self.data.lat, self.data.lon) self.data.socket_chademo = poi_data.get('Darab (CHAdeMO)') self.data.socket_chademo_output = poi_data.get( 'Teljesítmény (CHAdeMO)') self.data.socket_type2_combo = poi_data.get('Darab (CCS)') self.data.socket_type2_combo_output = poi_data.get( 'Teljesítmény (CCS)') self.data.socket_type2_cable = poi_data.get( 'Darab (Type 2)') self.data.socket_type2_cable_output = poi_data.get( 'Teljesítmény (Type 2)') self.data.manufacturer = poi_data.get('Gyártó') self.data.model = poi_data.get('Típus') self.data.capacity = poi_data.get('Kapacitás') self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: csv = pd.read_csv(self.link, encoding='UTF-8', sep=';', skiprows=1) if csv is not None: poi_dict = csv.to_dict('records') for poi_data in poi_dict: self.data.name = 'MOL Plugee' self.data.code = 'humolplchs' self.data.ref = poi_data.get('Azonosító') self.data.postcode = poi_data.get('Irányító szám') self.data.city = clean_city(poi_data.get('Település')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('Cím')) self.data.original = poi_data.get('Cím') lat, lng = poi_data.get('X'), poi_data.get('Y') self.data.lat, self.data.lon = check_hu_boundary( lat.replace(',', '.'), lng.replace(',', '.')) self.data.socket_chademo = poi_data.get('Darab (CHAdeMO)') self.data.socket_chademo_output = poi_data.get( 'Teljesítmény (CHAdeMO)') self.data.socket_type2_combo = poi_data.get('Darab (CCS)') self.data.socket_type2_combo_output = poi_data.get( 'Teljesítmény (CCS)') self.data.socket_type2_cable = poi_data.get( 'Darab (Type 2)') self.data.socket_type2_cable_output = poi_data.get( 'Teljesítmény (Type 2)') self.data.socket_type2 = poi_data.get( 'Darab (Type 2 – kábel nélkül)') self.data.socket_type2_output = poi_data.get( 'Teljesítmény (Type 2 – kábel nélkül)') self.data.manufacturer = poi_data.get('Gyártó') self.data.model = poi_data.get('Típus') self.data.capacity = poi_data.get('Kapacitás') self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype, POST_DATA) if soup is not None: text = json.loads(soup) for poi_data in text: self.data.name = 'Nemzeti Dohánybolt' self.data.code = 'hunemdotob' self.data.postcode = poi_data.get('postcode').strip() self.data.city = clean_city(poi_data['city']) self.data.original = poi_data['address'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['lat'], poi_data['lng']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) self.data.public_holiday_open = False self.data.add()
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text: self.data.name = 'Foxpost' self.data.code = 'hufoxpocso' self.data.postcode = poi_data['zip'].strip() self.data.city = clean_city(poi_data['city']) self.data.branch = poi_data['name'] for i in range(0, 7): if poi_data['open'][WeekDaysLongHUUnAccented( i).name.lower()] is not None: opening, closing = clean_opening_hours( poi_data['open'][WeekDaysLongHUUnAccented( i).name.lower()]) self.data.day_open(i, opening) self.data.day_close(i, closing) else: self.data.day_open_close(i, None, None) self.data.original = poi_data['address'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['geolat'], poi_data['geolng']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['street']) self.data.public_holiday_open = False self.data.description = poi_data.get('findme') self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` text = json.loads( extract_javascript_variable(soup, 'totem_stations')) for poi_data in text.values(): self.data.name = 'Mobil Petrol' self.data.code = 'humobpefu' self.data.website = poi_data.get('description') self.data.city = clean_city(poi_data.get('city')) self.data.original = poi_data.get('address') self.data.lat, self.data.lon = check_hu_boundary(poi_data['location']['lat'], poi_data['location']['lng']) self.data.postcode = None self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data.get('address')) self.data.phone = clean_phone_to_str(poi_data.get('phone')) self.data.public_holiday_open = False if '0-24' in poi_data.get('services'): self.data.nonstop = True self.data.public_holiday_open = True else: if '6-22' in poi_data.get('services'): open_from = '06:00' open_to = '22:00' elif '6-21' in poi_data.get('services'): open_from = '06:00' open_to = '21:00' elif '5-22' in poi_data.get('services'): open_from = '05:00' open_to = '22:00' elif '6-18' in poi_data.get('services'): open_from = '06:00' open_to = '18:00' if 'open_from' in locals() and 'open_to' in locals(): for i in range(0, 7): self.data.day_open(i, open_from) self.data.day_close(i, open_to) self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom if 'xpres' in poi_data['name']: self.data.name = 'Spar Expressz' self.data.code = 'husparecon' elif 'INTER' in poi_data['name']: self.data.name = 'Interspar' self.data.code = 'husparisup' elif 'market' in poi_data['name']: self.data.name = 'Spar' self.data.code = 'husparsup' elif 'DESPAR' in poi_data['name']: self.data.name = 'DeSpar' self.data.code = 'huspardcon' else: self.data.name = 'Spar' self.data.code = 'husparsup' poi_data['name'] = poi_data['name'].replace( 'INTERSPAR', 'Interspar') poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar') ref_match = PATTERN_SPAR_REF.search(poi_data['name']) self.data.ref = ref_match.group( 1).strip() if ref_match is not None else None self.data.city = clean_city(poi_data['city']) self.data.postcode = poi_data.get('zipCode').strip() self.data.branch = poi_data['name'].split('(')[0].strip() self.data.website = poi_data['pageUrl'].strip() self.data.lat, self.data.lon = check_hu_boundary( poi_data['latitude'], poi_data['longitude']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) self.data.original = poi_data['address'] self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) insert_data = [] if soup != None: text = json.loads(soup.get_text()) for poi_data in text: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) if 'xpres' in poi_data['name']: name = 'Spar Expressz' code = 'husparexp' elif 'INTER' in poi_data['name']: name = 'Interspar' code = 'husparint' elif 'market' in poi_data['name']: name = 'Spar' code = 'husparsup' else: name = 'Spar' code = 'husparsup' poi_data['name'] = poi_data['name'].replace( 'INTERSPAR', 'Interspar') poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar') ref_match = PATTERN_SPAR_REF.search(poi_data['name']) ref = ref_match.group( 1).strip() if ref_match is not None else None city = clean_city(poi_data['city']) postcode = poi_data['zipCode'].strip() branch = poi_data['name'].split('(')[0].strip() website = poi_data['pageUrl'].strip() nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None lat, lon = check_hu_boundary(poi_data['latitude'], poi_data['longitude']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) original = poi_data['address'] phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text['points']: if poi_data['fiok'] == 1: self.data.name = 'Budapest Bank' self.data.code = 'hubpbank' self.data.public_holiday_open = False else: self.data.name = 'Budapest Bank ATM' self.data.code = 'hubpatm' self.data.public_holiday_open = True self.data.postcode = poi_data['zip'] self.data.city = poi_data['city_only'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['latitude'], poi_data['longitude']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2(poi_data['addr']) self.data.original = poi_data['address'] self.data.branch = poi_data['name'] # Processing opening hours oh = [] if poi_data.get('opening') is not None: opening = poi_data.get('opening').split('||') self.data.nonstop = False for i in opening: if 'H:' in i: try: op = i.replace('H:', '').split('-')[0].strip() except IndexError as e: op = None self.data.mo_o = op if open is not None and op != '' else None try: cl = i.replace('H:', '').split('-')[1].strip() except IndexError as e: cl = None self.data.mo_c = cl if open is not None and cl != '' else None elif 'K:' in i: try: op = i.replace('K:', '').split('-')[0].strip() except IndexError as e: op = None self.data.tu_o = op if open is not None and op != '' else None try: cl = i.replace('K:', '').split('-')[1].strip() except IndexError as e: cl = None self.data.tu_c = cl if open is not None and cl != '' else None elif 'Sz:' in i: try: op = i.replace('Sz:', '').split('-')[0].strip() except IndexError as e: op = None self.data.we_o = op if open is not None and op != '' else None try: cl = i.replace('Sz:', '').split('-')[1].strip() except IndexError as e: cl = None self.data.we_c = cl if open is not None and cl != '' else None elif 'Cs:' in i: try: op = i.replace('Cs:', '').split('-')[0].strip() except IndexError as e: op = None self.data.th_o = op if open is not None and op != '' else None try: cl = i.replace('Cs:', '').split('-')[1].strip() except IndexError as e: cl = None self.data.th_c = cl if open is not None and cl != '' else None elif 'P:' in i: try: op = i.replace('P:', '').split('-')[0].strip() except IndexError as e: op = None self.data.fr_o = op if open is not None and op != '' else None try: cl = i.replace('P:', '').split('-')[1].strip() except IndexError as e: cl = None self.data.fr_c = cl if open is not None and cl != '' else None if self.data.code == 'hubpatm': self.data.nonstop = True else: self.data.nonstop = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text['markets']: self.data.name = 'Penny' self.data.code = 'hupennysup' self.data.postcode = poi_data['address']['zip'].strip() street_tmp = poi_data['address']['street'].split(',')[0] self.data.city = clean_city(poi_data['address']['city']) self.data.original = poi_data['address']['street'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['address']['latitude'], poi_data['address']['longitude']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( street_tmp.title()) if 'phone' in poi_data and poi_data['phone'] != '': self.data.phone = clean_phone_to_str(poi_data['phone']) if 'id' in poi_data and poi_data['id'] != '': self.data.ref = poi_data['id'].strip() self.data.public_holiday_open = False # TODO: Parsing opening_hours from datasource self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text.get('stores'): self.data.name = 'OBI' self.data.code = 'huobidiy' self.data.postcode = poi_data['address']['zip'].strip() self.data.city = clean_city(poi_data['address']['city']) self.data.original = poi_data['address']['street'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['address']['lat'], poi_data['address']['lon']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']['street']) if 'phone' in poi_data and poi_data.get('phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if 'storeNumber' in poi_data and poi_data.get( 'storeNumber') != '': self.data.ref = poi_data.get('storeNumber').strip() if 'email' in poi_data and poi_data.get('email') != '': self.data.email = clean_email(poi_data.get('email')) if 'path' in poi_data and poi_data.get('path') != '': self.data.website = poi_data.get('path') # TODO: opening hour parser for poi_data.get('hours'), format is like: # Hétfő - Szombat: 8:00 - 20:00\nVasárnap: 08:00 - 18:00 # self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) insert_data = [] if soup != None: # parse the html using beautiful soap and store in variable `soup` pattern = re.compile('^\s*var\s*boltok_nyers.*') script = soup.find('script', text=pattern) m = pattern.match(script.get_text()) data = m.group(0) data = clean_javascript_variable(data, 'boltok_nyers') text = json.loads(data) # for l in text: # print ('postcode: {postcode}; city: {city}; address: {address}; alt_name: {alt_name}'.format(postcode=l['A_IRSZ'], city=l['A_VAROS'], address=l['A_CIM'], alt_name=l['P_NAME'])) for poi_data in text: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['A_CIM']) city = clean_city(poi_data['A_VAROS']) postcode = poi_data['A_IRSZ'].strip() branch = poi_data['P_NAME'].strip() name = 'Príma' if 'Príma' in branch else 'CBA' code = 'huprimacon' if 'Príma' in branch else 'hucbacon' website = None nonstop = None mo_o = clean_opening_hours_2( poi_data['PS_OPEN_FROM_1'] ) if poi_data['PS_OPEN_FROM_1'] is not None else None th_o = clean_opening_hours_2( poi_data['PS_OPEN_FROM_2'] ) if poi_data['PS_OPEN_FROM_2'] is not None else None we_o = clean_opening_hours_2( poi_data['PS_OPEN_FROM_3'] ) if poi_data['PS_OPEN_FROM_3'] is not None else None tu_o = clean_opening_hours_2( poi_data['PS_OPEN_FROM_4'] ) if poi_data['PS_OPEN_FROM_4'] is not None else None fr_o = clean_opening_hours_2( poi_data['PS_OPEN_FROM_5'] ) if poi_data['PS_OPEN_FROM_5'] is not None else None sa_o = clean_opening_hours_2( poi_data['PS_OPEN_FROM_6'] ) if poi_data['PS_OPEN_FROM_6'] is not None else None su_o = clean_opening_hours_2( poi_data['PS_OPEN_FROM_7'] ) if poi_data['PS_OPEN_FROM_7'] is not None else None mo_c = clean_opening_hours_2( poi_data['PS_OPEN_TO_1'] ) if poi_data['PS_OPEN_TO_1'] is not None else None th_c = clean_opening_hours_2( poi_data['PS_OPEN_TO_2'] ) if poi_data['PS_OPEN_TO_2'] is not None else None we_c = clean_opening_hours_2( poi_data['PS_OPEN_TO_3'] ) if poi_data['PS_OPEN_TO_3'] is not None else None tu_c = clean_opening_hours_2( poi_data['PS_OPEN_TO_4'] ) if poi_data['PS_OPEN_TO_4'] is not None else None fr_c = clean_opening_hours_2( poi_data['PS_OPEN_TO_5'] ) if poi_data['PS_OPEN_TO_5'] is not None else None sa_c = clean_opening_hours_2( poi_data['PS_OPEN_TO_6'] ) if poi_data['PS_OPEN_TO_6'] is not None else None su_c = clean_opening_hours_2( poi_data['PS_OPEN_TO_7'] ) if poi_data['PS_OPEN_TO_7'] is not None else None original = poi_data['A_CIM'] lat, lon = check_hu_boundary(poi_data['PS_GPS_COORDS_LAT'], poi_data['PS_GPS_COORDS_LNG']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) ref = None if 'PS_PUBLIC_TEL' in poi_data and poi_data[ 'PS_PUBLIC_TEL'] != '': phone = clean_phone(poi_data['PS_PUBLIC_TEL']) else: phone = None if 'PS_PUBLIC_EMAIL' in poi_data and poi_data[ 'PS_PUBLIC_EMAIL'] != '': email = poi_data['PS_PUBLIC_EMAIL'] else: email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) insert_data = [] if soup != None: text = json.loads(soup.get_text()) for poi_data in text['items']: if poi_data['type'] == 'posta': if 'mobilposta' in poi_data['name']: name = 'Mobilposta' code = 'hupostamp' else: name = 'Posta' code = 'hupostapo' elif poi_data['type'] == 'csekkautomata': name = 'Posta csekkautomata' code = 'hupostacse' elif poi_data['type'] == 'postamachine': name = 'Posta csomagautomata' code = 'hupostacso' elif poi_data['type'] == 'postapoint': name = 'PostaPont' code = 'hupostapp' else: logging.error('Non existing Posta type.') postcode = poi_data['zipCode'].strip() street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) city = clean_city(poi_data['city']) branch = poi_data['name'] website = None nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None geom = check_geom(poi_data['lat'], poi_data['lng']) original = poi_data['address'] ref = None phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(str(soup)) for poi_data in text: try: if 'BENU Gyógyszertár' not in poi_data.get('title'): self.data.name = poi_data.get('title').strip() self.data.branch = None else: self.data.name = 'Benu gyógyszertár' self.data.branch = poi_data.get('title').strip() self.data.code = 'hubenupha' if poi_data.get('description') is not None: pu_match = PATTERN_FULL_URL.match(poi_data.get('description')) self.data.website = pu_match.group(0).strip() if pu_match is not None else None else: self.data.website = None self.data.city = clean_city(poi_data.get('city')) self.data.postcode = poi_data.get('postal_code').strip() self.data.lat, self.data.lon = check_hu_boundary(poi_data.get('lat'), poi_data.get('lng')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data.get(('street'))) self.data.original = poi_data.get('street') if 'phone' in poi_data and poi_data.get('phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) else: self.data.phone = None self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) insert_data = [] if soup != None: # parse the html using beautiful soap and store in variable `soup` # script = soup.find('div', attrs={'data-stores':True}) script = soup.find(attrs={'data-stores': True}) text = json.loads(script['data-stores']) for poi_data in text: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) city = clean_city(poi_data['city']) branch = poi_data['name'] if 'xpres' in poi_data['name']: name = 'Tesco Expressz' code = 'hutescoexp' elif 'xtra' in poi_data['name']: name = 'Tesco Extra' code = 'hutescoext' else: name = 'Tesco' code = 'hutescosup' website = poi_data['url'] nonstop = None opening = json.loads(poi_data['opening']) mo_o = opening['1'][0] th_o = opening['2'][0] we_o = opening['3'][0] tu_o = opening['4'][0] fr_o = opening['5'][0] sa_o = opening['6'][0] su_o = opening['0'][0] mo_c = opening['1'][1] th_c = opening['2'][1] we_c = opening['3'][1] tu_c = opening['4'][1] fr_c = opening['5'][1] sa_c = opening['6'][1] su_c = opening['0'][1] lat, lon = check_hu_boundary(poi_data['gpslat'], poi_data['gpslng']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, None) original = poi_data['address'] ref = None if 'phone' in poi_data and poi_data['phone'] != '': phone = clean_phone(poi_data['phone']) else: phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), None, self.verify_link) insert_data = [] if soup != None: # parse the html using beautiful soap and store in variable `soup` pattern = re.compile('^\s*var\s*places.*') script = soup.find('script', text=pattern) m = pattern.match(script.get_text()) data = m.group(0) data = clean_javascript_variable(data, 'places') text = json.loads(data) for poi_data in text: poi_data = poi_data['addresses'][0] # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']) name = 'Rossmann' code = 'hurossmche' city = clean_city(poi_data['city']) postcode = poi_data['zip'].strip() branch = None website = None nonstop = False if poi_data['business_hours']['monday'] is not None: mo_o, mo_c = clean_opening_hours(poi_data['business_hours']['monday']) else: mo_o, mo_c = None, None if poi_data['business_hours']['tuesday'] is not None: th_o, th_c = clean_opening_hours(poi_data['business_hours']['tuesday']) else: th_o, th_c = None, None if poi_data['business_hours']['wednesday'] is not None: we_o, we_c = clean_opening_hours(poi_data['business_hours']['wednesday']) else: we_o, we_c = None, None if poi_data['business_hours']['thursday'] is not None: tu_o, tu_c = clean_opening_hours(poi_data['business_hours']['thursday']) else: tu_o, tu_c = None, None if poi_data['business_hours']['friday'] is not None: fr_o, fr_c = clean_opening_hours(poi_data['business_hours']['friday']) else: fr_o, fr_c = None, None if poi_data['business_hours']['saturday'] is not None: sa_o, sa_c = clean_opening_hours(poi_data['business_hours']['saturday']) else: sa_o, sa_c = None, None if poi_data['business_hours']['sunday'] is not None: su_o, su_c = clean_opening_hours(poi_data['business_hours']['sunday']) else: su_o, su_c = None, None lat, lon = check_hu_boundary(poi_data['position'][0], poi_data['position'][1]) geom = check_geom(lat, lon) postcode = query_postcode_osm_external(self.prefer_osm_postcode, self.session, lat, lon, postcode) original = poi_data['address'] ref = None phone = None email = None insert_data.append( [code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) data = [] insert_data = [] if soup != None: # parse the html using beautiful soap and store in variable `soup` table = soup.find('table', attrs={'class': 'tescoce-table'}) table_body = table.find('tbody') rows = table_body.find_all('tr') for row in rows: cols = row.find_all('td') link = cols[0].find('a').get( 'href') if cols[0].find('a') != None else [] cols = [element.text.strip() for element in cols] cols[0] = cols[0].split('\n')[0] del cols[-1] del cols[-1] cols.append(link) data.append(cols) for poi_data in data: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data[3]) tesco_replace = re.compile('(expressz{0,1})', re.IGNORECASE) poi_data[0] = tesco_replace.sub('Expressz', poi_data[0]) if 'xpres' in poi_data[0]: name = 'Tesco Expressz' code = 'hutescoexp' elif 'xtra' in poi_data[0]: name = 'Tesco Extra' code = 'hutescoext' else: name = 'Tesco' code = 'hutescosup' poi_data[0] = poi_data[0].replace('TESCO', 'Tesco') poi_data[0] = poi_data[0].replace('Bp.', 'Budapest') postcode = poi_data[1].strip() city = clean_city(poi_data[2].split(',')[0]) branch = poi_data[0] website = poi_data[4] nonstop = None mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None original = poi_data[3] geom = None ref = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` text = json.loads( extract_javascript_variable(soup, 'boltok_nyers')) for poi_data in text: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom self.data.city = clean_city(poi_data.get('A_VAROS')) self.data.postcode = poi_data.get('A_IRSZ').strip() self.data.branch = poi_data.get('P_NAME').strip() self.data.name = 'Príma' if 'Príma' in self.data.branch else 'CBA' self.data.code = 'huprimacon' if 'Príma' in self.data.branch else 'hucbacon' for i in range(0, 7): self.data.day_open( i, clean_opening_hours_2( poi_data.get('PS_OPEN_FROM_{}'.format(i + 1))) if poi_data.get('PS_OPEN_FROM_{}'.format(i + 1)) is not None else None) self.data.day_close( i, clean_opening_hours_2( poi_data.get('PS_OPEN_TO_{}'.format(i + 1))) if poi_data.get('PS_OPEN_TO_{}'.format(i + 1)) is not None else None) self.data.original = poi_data.get('A_CIM') self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('PS_GPS_COORDS_LAT'), poi_data.get('PS_GPS_COORDS_LNG')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data.get('A_CIM')) if 'PS_PUBLIC_TEL' in poi_data and poi_data.get( 'PS_PUBLIC_TEL') != '': self.data.phone = clean_phone_to_str( poi_data.get('PS_PUBLIC_TEL')) else: self.data.phone = None if 'PS_PUBLIC_EMAIL' in poi_data and poi_data.get( 'PS_PUBLIC_EMAIL') != '': self.data.email = poi_data.get('PS_PUBLIC_EMAIL') else: self.data.email = None self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): csv = save_downloaded_pd( '{}'.format(self.link), os.path.join(self.download_cache, self.filename)) if csv is not None: csv[['Post code']] = csv[['Post code']].fillna('0000') csv[['Post code']] = csv[['Post code']].astype(int) csv[['Telephone']] = csv[['Telephone']].fillna('0') csv[['Telephone']] = csv[['Telephone']].astype(int) csv[['City']] = csv[['City']].fillna('') csv[['Name']] = csv[['Name']].fillna('') insert_data = [] poi_dict = csv.to_dict('records') for poi_data in poi_dict: if poi_data['Brand'] == 'Shell': name = 'Shell' code = 'hushellfu' elif poi_data['Brand'] == 'Mobilpetrol': name = 'Mobil Petrol' code = 'humobpefu' postcode = poi_data['Post code'] steet_tmp = poi_data['Address'].lower().split() for i in range(0, len(steet_tmp) - 2): steet_tmp[i] = steet_tmp[i].capitalize() steet_tmp = ' '.join(steet_tmp) street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( steet_tmp) if poi_data['City'] != '': city = clean_city(poi_data['City'].title()) else: if poi_data['Name'] != '': city = clean_city(poi_data['Name'].title()) else: city = None branch = poi_data['Name'].strip() website = None if poi_data['24 Hour'] == True: nonstop = True mo_o = None th_o = None we_o = None tu_o = None fr_o = None sa_o = None su_o = None mo_c = None th_c = None we_c = None tu_c = None fr_c = None sa_c = None su_c = None else: nonstop = False mo_o = '06:00' th_o = '06:00' we_o = '06:00' tu_o = '06:00' fr_o = '06:00' sa_o = '06:00' su_o = '06:00' mo_c = '22:00' th_c = '22:00' we_c = '22:00' tu_c = '22:00' fr_c = '22:00' sa_c = '22:00' su_c = '22:00' original = poi_data['Address'] ref = None lat, lon = check_hu_boundary(poi_data['GPS Latitude'], poi_data['GPS Longitude']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) if 'Telephone' in poi_data and poi_data['Telephone'] != '': phone = clean_phone(str(poi_data['Telephone'])) else: phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), POST_DATA) insert_data = [] if soup != None: text = json.loads(soup.get_text()) for poi_data in text['results']: name = 'OMV' code = 'huomvfu' postcode = poi_data['postcode'].strip() street, housenumber, conscriptionnumber = extract_street_housenumber_better_2( poi_data['address_l']) city = clean_city(poi_data['town_l']) branch = None website = None nonstop = None if poi_data['open_hours'] is not None: oho, ohc = clean_opening_hours(poi_data['open_hours']) if oho == '00:00' and ohc == '24:00': nonstop = True oho, ohc = None, None else: oho, ohc = None, None mo_o = oho th_o = oho we_o = oho tu_o = oho fr_o = oho sa_o = oho su_o = oho mo_c = ohc th_c = ohc we_c = ohc tu_c = ohc fr_c = ohc sa_c = ohc su_c = ohc original = poi_data['address_l'] ref = None lat, lon = check_hu_boundary(poi_data['y'], poi_data['x']) geom = check_geom(lat, lon) postcode = query_postcode_osm_external( self.prefer_osm_postcode, self.session, lat, lon, postcode) if 'telnr' in poi_data and poi_data['telnr'] != '': phone = clean_phone(poi_data['telnr']) else: phone = None email = None insert_data.append([ code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c ]) if len(insert_data) < 1: logging.warning('Resultset is empty. Skipping ...') else: df = pd.DataFrame(insert_data) df.columns = POI_COLS insert_poi_dataframe(self.session, df)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` # script = soup.find('div', attrs={'data-stores':True}) text = json.loads(str(soup)) for poi_data in text.get('stores'): try: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, # conscriptionnumber, ref, geom self.data.branch = poi_data.get('store_name') self.data.ref = poi_data.get('goldid') self.data.website = 'https://tesco.hu/aruhazak/aruhaz/{}/'.format( poi_data.get('urlname')) opening = json.loads(poi_data.get('opening')) for i in range(0, 7): ind = str(i + 1) if i != 6 else '0' if ind in opening: self.data.day_open(i, opening[ind][0]) self.data.day_close(i, opening[ind][1]) self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('gpslat'), poi_data.get('gpslng')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) self.data.postcode = poi_data.get('zipcode').strip() self.data.city = clean_city( query_osm_city_name_gpd(self.session, self.data.lat, self.data.lon)) if 'xpres' in poi_data.get('name'): if self.data.city not in [ 'Győr', 'Sopron', 'Mosonmagyaróvár', 'Levél' ]: self.data.name = 'Tesco Expressz' self.data.code = 'hutescoexp' else: self.data.name = 'S-Market' self.data.code = 'husmrktexp' elif 'xtra' in poi_data.get('name'): self.data.name = 'Tesco Extra' self.data.code = 'hutescoext' else: if self.data.city not in ['Levél']: self.data.name = 'Tesco' self.data.code = 'hutescosup' else: self.data.name = 'S-Market' self.data.code = 'husmrktsup' self.data.original = poi_data.get('address') if poi_data.get('phone') is not None and poi_data.get( 'phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if poi_data.get('goldid') is not None and poi_data.get( 'goldid') != '': self.data.ref = poi_data.get('goldid').strip() self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')