def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(str(soup)) for poi_data in text.get('list'): try: if poi_data.get('country_id') != 3: continue else: self.data.name = 'Yves Rocher' self.data.code = 'huyvesrcos' self.data.lat, self.data.lon = \ check_hu_boundary(poi_data.get( 'latitude'), poi_data.get('longitude')) self.data.website = 'https://www.yves-rocher.hu{}/'.format( poi_data.get('request_path')) opening = poi_data.get('hours') for i in range(0, 7): if i in opening: self.data.day_open( i, opening[i]['hour_from']) self.data.day_close( i, opening[i]['hour_to']) self.data.postcode = poi_data.get('zip') self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) self.data.city = clean_city(poi_data.get('city')) self.data.original = poi_data.get('address') if poi_data.get('phone') is not None and poi_data.get('phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \ and self.data.phone is not None: self.data.phone = '{};{}'.format(self.data.phone, clean_phone_to_str(poi_data.get('mobile'))) elif poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \ and self.data.phone is None: self.data.phone = clean_phone_to_str( poi_data.get('mobile')) self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text['markets']: self.data.name = 'Penny' self.data.code = 'hupennysup' self.data.postcode = poi_data['address']['zip'].strip() street_tmp = poi_data['address']['street'].split(',')[0] self.data.city = clean_city(poi_data['address']['city']) self.data.original = poi_data['address']['street'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['address']['latitude'], poi_data['address']['longitude']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( street_tmp.title()) if 'phone' in poi_data and poi_data['phone'] != '': self.data.phone = clean_phone_to_str(poi_data['phone']) if 'id' in poi_data and poi_data['id'] != '': self.data.ref = poi_data['id'].strip() self.data.public_holiday_open = False # TODO: Parsing opening_hours from datasource self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` # script = soup.find('div', attrs={'data-stores':True}) text = json.loads(str(soup)) for poi_data in text['stores']: try: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, # conscriptionnumber, ref, geom self.data.code = 'hutommacon' if poi_data.get( 'name' )[2] is not None and poi_data.get('name')[2] != '': self.data.ref = poi_data.get('name')[2] if poi_data.get( 'website' ) is not None and poi_data.get('website') != '': self.data.website = poi_data.get('website') else: self.data.website = 'https://tommarket.hu' self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('lat'), poi_data.get('long')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) if poi_data.get('zip') is not None and poi_data.get( 'zip') != '': self.data.postcode = poi_data.get('zip') self.data.original = poi_data.get('address') if poi_data.get( 'settlement' ) is not None and poi_data.get('settlement') != '': self.data.city = clean_city( poi_data.get('settlement')) else: self.data.city = query_osm_city_name_gpd( self.session, self.data.lat, self.data.lon) if poi_data.get('phone') is not None and poi_data.get( 'phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if poi_data.get('email') is not None and poi_data.get( 'email') != '': self.data.phone = poi_data.get('email').strip() self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text: try: self.data.name = 'OIL!' self.data.code = 'huoilfu' if poi_data.get('zip') is not None and poi_data.get( 'zip') != '': self.data.postcode = poi_data.get('zip').strip() if poi_data.get('city') is not None and poi_data.get( 'city') != '': self.data.city = clean_city(poi_data.get('city')) self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('lat'), poi_data.get('lng')) if poi_data.get( 'address' ) is not None and poi_data.get('address') != '': self.data.original = poi_data.get('address') self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) if poi_data.get('phone') is not None and poi_data.get( 'phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) self.data.fuel_octane_95 = True self.data.fuel_diesel = True if poi_data.get( 'id') is not None and poi_data.get('id') != '': self.data.ref = poi_data.get('id').strip() if poi_data.get('url') is not None and poi_data.get( 'url') != '': self.data.website = poi_data.get('url').strip() else: self.data.website = 'https://www.oil-benzinkutak.hu' if poi_data.get('store') is not None and poi_data.get( 'store') != '': tmp = poi_data.get('store').split(' ', 1) self.data.branch = tmp[1].strip().capitalize() self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` text = json.loads( extract_javascript_variable(soup, 'boltok_nyers')) for poi_data in text: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom self.data.city = clean_city(poi_data.get('A_VAROS')) self.data.postcode = poi_data.get('A_IRSZ').strip() self.data.branch = poi_data.get('P_NAME').strip() self.data.name = 'Príma' if 'Príma' in self.data.branch else 'CBA' self.data.code = 'huprimacon' if 'Príma' in self.data.branch else 'hucbacon' for i in range(0, 7): self.data.day_open( i, clean_opening_hours_2( poi_data.get('PS_OPEN_FROM_{}'.format(i + 1))) if poi_data.get('PS_OPEN_FROM_{}'.format(i + 1)) is not None else None) self.data.day_close( i, clean_opening_hours_2( poi_data.get('PS_OPEN_TO_{}'.format(i + 1))) if poi_data.get('PS_OPEN_TO_{}'.format(i + 1)) is not None else None) self.data.original = poi_data.get('A_CIM') self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('PS_GPS_COORDS_LAT'), poi_data.get('PS_GPS_COORDS_LNG')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data.get('A_CIM')) if 'PS_PUBLIC_TEL' in poi_data and poi_data.get( 'PS_PUBLIC_TEL') != '': self.data.phone = clean_phone_to_str( poi_data.get('PS_PUBLIC_TEL')) else: self.data.phone = None if 'PS_PUBLIC_EMAIL' in poi_data and poi_data.get( 'PS_PUBLIC_EMAIL') != '': self.data.email = poi_data.get('PS_PUBLIC_EMAIL') else: self.data.email = None self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text['data']: ''' The Pepco dataset contains all European data. Since the program cannot handle POIs outside Hungary (so far) this will limit only for Hungarian POIs In fact this depends on OSM extract but currently we use only Hungarian OSM extract Select only Hungarian POIs ''' if 'city' in poi_data and ( poi_data['city'] == '' or query_osm_city_name( self.session, poi_data['city']) is None): continue elif 'city' in poi_data: self.data.city = clean_city(poi_data['city']) else: continue self.data.name = 'Pepco' self.data.code = 'hupepcoclo' # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom self.data.lat, self.data.lon = \ check_hu_boundary( poi_data['coordinates']['lat'], poi_data['coordinates']['lng']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('streetAddress')) self.data.original = poi_data.get('streetAddress') self.data.postcode = poi_data.get('postalCode') # self.data.city = query_osm_city_name_gpd(self.session, self.data.lat, self.data.lon) # Assign opening_hours opening = poi_data['openingHours'] for i in range(0, 7): if i in opening: self.data.day_open(i, opening[i]['from']) self.data.day_close(i, opening[i]['to']) # Assign additional informations self.data.phone = clean_phone_to_str( poi_data.get('phoneNumber')) self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(logging.error(e))
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` text = json.loads( extract_javascript_variable(soup, 'totem_stations')) for poi_data in text.values(): self.data.name = 'Mobil Petrol' self.data.code = 'humobpefu' self.data.website = poi_data.get('description') self.data.city = clean_city(poi_data.get('city')) self.data.original = poi_data.get('address') self.data.lat, self.data.lon = check_hu_boundary(poi_data['location']['lat'], poi_data['location']['lng']) self.data.postcode = None self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data.get('address')) self.data.phone = clean_phone_to_str(poi_data.get('phone')) self.data.public_holiday_open = False if '0-24' in poi_data.get('services'): self.data.nonstop = True self.data.public_holiday_open = True else: if '6-22' in poi_data.get('services'): open_from = '06:00' open_to = '22:00' elif '6-21' in poi_data.get('services'): open_from = '06:00' open_to = '21:00' elif '5-22' in poi_data.get('services'): open_from = '05:00' open_to = '22:00' elif '6-18' in poi_data.get('services'): open_from = '06:00' open_to = '18:00' if 'open_from' in locals() and 'open_to' in locals(): for i in range(0, 7): self.data.day_open(i, open_from) self.data.day_close(i, open_to) self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def process(self): try: if self.link: with open(self.link, 'r') as f: text = json.load(f) data = POIDataset() for poi_data in text['results']: first_element = next(iter(poi_data)) if self.name == 'K&H Bank': data.name = 'K&H Bank' data.code = 'hukhbank' data.public_holiday_open = False elif self.name == 'K&H Bank ATM': data.name = 'K&H Bank ATM' data.code = 'hukhatm' data.public_holiday_open = True if data.code == 'hukhatm': data.nonstop = True else: data.nonstop = False data.lat, data.lon = check_hu_boundary( poi_data.get(first_element)['latitude'], poi_data.get(first_element)['longitude']) if poi_data.get(first_element)['address'] is not None and \ poi_data.get(first_element)['address'] != '': data.postcode, data.city, data.street, data.housenumber, data.conscriptionnumber = \ extract_all_address( poi_data.get(first_element)['address']) data.original = poi_data.get( first_element)['address'] if poi_data.get( 'phoneNumber' ) is not None and poi_data.get('phoneNumber') != '': data.phone = clean_phone_to_str( poi_data.get('phoneNumber')) else: data.phone = None data.add() if data is None or data.lenght() < 1: logging.warning('Resultset is empty. Skipping ...') else: insert_poi_dataframe(self.session, data.process()) except Exception as e: logging.exception('Exception occurred') logging.error(e) logging.error(poi_data)
def process(self): try: if self.link: with open(self.link, 'r') as f: text = json.load(f) data = POIDataset() for poi_data in text['availableLocations']: if 'locationStatus' in poi_data and poi_data[ 'locationStatus'] == 'IN_SERVICE': if self.name == 'CIB Bank': data.name = 'CIB Bank' data.code = 'hucibbank' data.public_holiday_open = False else: data.name = 'CIB Bank ATM' data.code = 'hucibatm' data.public_holiday_open = True data.lat, data.lon = check_hu_boundary( poi_data['location']['lat'], poi_data['location']['lon']) data.city = clean_city(poi_data['city']) data.postcode = poi_data.get('zip').strip() data.housenumber = poi_data['streetNo'].strip() data.street = poi_data['streetName'].strip() data.branch = poi_data['name'] if 'phone' in poi_data and poi_data['phone'] != '': data.phone = clean_phone_to_str( poi_data['phone']) if 'email' in poi_data and poi_data['email'] != '': data.email = poi_data['email'].strip() data.original = poi_data['fullAddress'] data.add() if data is None or data.lenght() < 1: logging.warning('Resultset is empty. Skipping ...') else: insert_poi_dataframe(self.session, data.process()) except Exception as e: logging.exception('Exception occurred') logging.error(e) logging.error(poi_data)
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(str(soup)) for poi_data in text: try: if 'BENU Gyógyszertár' not in poi_data.get('title'): self.data.name = poi_data.get('title').strip() self.data.branch = None else: self.data.name = 'Benu gyógyszertár' self.data.branch = poi_data.get('title').strip() self.data.code = 'hubenupha' if poi_data.get('description') is not None: pu_match = PATTERN_FULL_URL.match(poi_data.get('description')) self.data.website = pu_match.group(0).strip() if pu_match is not None else None else: self.data.website = None self.data.city = clean_city(poi_data.get('city')) self.data.postcode = poi_data.get('postal_code').strip() self.data.lat, self.data.lon = check_hu_boundary(poi_data.get('lat'), poi_data.get('lng')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data.get(('street'))) self.data.original = poi_data.get('street') if 'phone' in poi_data and poi_data.get('phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) else: self.data.phone = None self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text.get('stores'): self.data.name = 'OBI' self.data.code = 'huobidiy' self.data.postcode = poi_data['address']['zip'].strip() self.data.city = clean_city(poi_data['address']['city']) self.data.original = poi_data['address']['street'] self.data.lat, self.data.lon = check_hu_boundary( poi_data['address']['lat'], poi_data['address']['lon']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2( poi_data['address']['street']) if 'phone' in poi_data and poi_data.get('phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if 'storeNumber' in poi_data and poi_data.get( 'storeNumber') != '': self.data.ref = poi_data.get('storeNumber').strip() if 'email' in poi_data and poi_data.get('email') != '': self.data.email = clean_email(poi_data.get('email')) if 'path' in poi_data and poi_data.get('path') != '': self.data.website = poi_data.get('path') # TODO: opening hour parser for poi_data.get('hours'), format is like: # Hétfő - Szombat: 8:00 - 20:00\nVasárnap: 08:00 - 18:00 # self.data.public_holiday_open = False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)
def test_clean_phone(self): for i in self.phones: original, ph = i['original'], i['converted'] a = clean_phone_to_str(original) with self.subTest(): self.assertEqual(ph, a)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` # script = soup.find('div', attrs={'data-stores':True}) text = json.loads(str(soup)) for poi_data in text.get('stores'): try: # Assign: code, postcode, city, name, branch, website, original, street, housenumber, # conscriptionnumber, ref, geom self.data.branch = poi_data.get('store_name') self.data.ref = poi_data.get('goldid') self.data.website = 'https://tesco.hu/aruhazak/aruhaz/{}/'.format( poi_data.get('urlname')) opening = json.loads(poi_data.get('opening')) for i in range(0, 7): ind = str(i + 1) if i != 6 else '0' if ind in opening: self.data.day_open(i, opening[ind][0]) self.data.day_close(i, opening[ind][1]) self.data.lat, self.data.lon = check_hu_boundary( poi_data.get('gpslat'), poi_data.get('gpslng')) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( poi_data.get('address')) self.data.postcode = poi_data.get('zipcode').strip() self.data.city = clean_city( query_osm_city_name_gpd(self.session, self.data.lat, self.data.lon)) if 'xpres' in poi_data.get('name'): if self.data.city not in [ 'Győr', 'Sopron', 'Mosonmagyaróvár', 'Levél' ]: self.data.name = 'Tesco Expressz' self.data.code = 'hutescoexp' else: self.data.name = 'S-Market' self.data.code = 'husmrktexp' elif 'xtra' in poi_data.get('name'): self.data.name = 'Tesco Extra' self.data.code = 'hutescoext' else: if self.data.city not in ['Levél']: self.data.name = 'Tesco' self.data.code = 'hutescosup' else: self.data.name = 'S-Market' self.data.code = 'husmrktsup' self.data.original = poi_data.get('address') if poi_data.get('phone') is not None and poi_data.get( 'phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if poi_data.get('goldid') is not None and poi_data.get( 'goldid') != '': self.data.ref = poi_data.get('goldid').strip() self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: text = json.loads(soup) for poi_data in text['stores']: try: if poi_data.get( 'localeCountry').strip().upper() == 'HU': self.data.name = 'dm' self.data.code = 'hudmche' self.data.postcode = poi_data.get( 'address')['zip'].strip() street_tmp = poi_data.get( 'address')['street'].split(',')[0] self.data.city = clean_city( poi_data.get('address')['city']) self.data.website = 'https://www.dm.hu{}'.format( poi_data.get('storeUrlPath')) self.data.original = poi_data.get( 'address')['street'] self.data.lat, self.data.lon = \ check_hu_boundary(poi_data.get('location')[ 'lat'], poi_data.get('location')['lon']) self.data.street, self.data.housenumber, self.data.conscriptionnumber = \ extract_street_housenumber_better_2( street_tmp.title()) if poi_data.get( 'phone' ) is not None and poi_data.get('phone') != '': self.data.phone = clean_phone_to_str( poi_data.get('phone')) if poi_data.get('storeNumber' ) is not None and poi_data.get( 'storeNumber') != '': self.data.ref = poi_data.get( 'storeNumber').strip() opening = poi_data.get('openingDays') try: for i, d in enumerate(opening): if d.get('weekDay' ) is not None and 1 <= d.get( 'weekDay') <= 7: day = d.get('weekDay') self.data.day_open( day - 1, d.get('timeSlices')[0].get( 'opening')) self.data.day_close( day - 1, d.get('timeSlices')[0].get( 'closing')) except (IndexError, KeyError): logging.warning( 'Exception occurred during opening hours processing' ) self.data.public_holiday_open = False self.data.add() except Exception as e: logging.error(e) logging.error(poi_data) logging.exception('Exception occurred') except Exception as e: logging.error(e) logging.exception('Exception occurred')
def process(self): try: soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) for e in soup.findAll('post'): try: # If this is a closed post office, skip it # if e.get('ispostpoint') == '0': # continue # The 'kirendeltség' post offices are not available to end users, so we remove them if 'okmányiroda' in e.find('name').get_text().lower() or \ 'mol kirendeltség' in e.find('name').get_text().lower(): logging.debug('Skipping non public post office.') continue else: if e.servicepointtype.get_text() == 'PM': self.data.name = 'Posta' self.data.code = 'hupostapo' self.data.public_holiday_open = False elif e.servicepointtype.get_text() == 'CS': self.data.name = 'Posta csomagautomata' self.data.code = 'hupostacso' self.data.public_holiday_open = True elif e.servicepointtype.get_text() == 'PP': self.data.name = 'PostaPont' self.data.code = 'hupostapp' self.data.public_holiday_open = False else: logging.error('Non existing Posta type.') self.data.postcode = e.get('zipcode') self.data.housenumber = e.street.housenumber.get_text().split('(', 1)[0].strip() \ if e.street.housenumber is not None else None self.data.conscriptionnumber = None self.data.city = clean_city(e.city.get_text()) self.data.branch = e.find('name').get_text( ) if e.find('name') is not None else None if self.data.code == 'hupostapo': self.data.branch = re.sub( r"(\d{1,3})", r"\1. számú", self.data.branch) days = e.findAll('days') if e.findAll( 'days') is not None else None nonstop_num = 0 for d in days: if len(d) != 0: day_key = None # Try to match day name in data source (day tag) with on of WeekDaysLongHU enum element # Select day based on d.day matching for rd in WeekDaysLongHU: if rd.name == d.day.get_text(): day_key = rd.value break else: day_key = None # No day matching skip to next # Skip days that are not exist at data provider's if day_key is None: logging.warning('Cannot find any opening hours information for day {}.'. format(rd.name)) continue else: # Extract from and to information from1 = d.from1.get_text() if d.from1 is not None else None to1 = d.to1.get_text() if d.to1 is not None else None from2 = d.from2.get_text() if d.from2 is not None else None to2 = d.to2.get_text() if d.to2 is not None else None # Avoid duplicated values of opening and close if from1 != from2 and to1 != to2: logging.debug('Opening hours in post office: %s: %s-%s; %s-%s.', self.data.branch, from1, to1, from2, to2) self.data.day_open(day_key, from1) if from2 is None or to2 is None: self.data.day_close(day_key, from1) # Count opening hours with nonstop like settings if from1 in '0:00' and to1 in ['0:00', '23:59', '24:00']: nonstop_num += 1 else: # Check on Wednesday if there is a lunch break # Only same lunch break is supported for every days if day_key == 3: self.data.lunch_break_start = to1 self.data.lunch_break_stop = from2 self.data.day_close(day_key, to2) # Count opening hours with nonstop like settings if from1 in '0:00' and to2 in ['0:00', '23:59', '24:00']: nonstop_num += 1 else: # It seems there are duplications in Posta data source # Remove duplicates logging.warning('Dulicated opening hours in post office: %s: %s-%s; %s-%s.', self.data.branch, from1, to1, from2, to2) from2, to2 = None, None # All times are open so it is non stop if nonstop_num >= 7: logging.debug('It is a non stop post office.') self.data.nonstop = True self.data.lat, self.data.lon = \ check_hu_boundary(e.gpsdata.wgslat.get_text().replace(',', '.'), e.gpsdata.wgslon.get_text().replace(',', '.')) # Get street name and type street_tmp_1 = clean_street(e.street.find('name').get_text().strip()) \ if e.street.find('name') is not None else None street_tmp_2 = clean_street_type(e.street.type.get_text().strip()) \ if e.street.type is not None else None # Streets without types if street_tmp_2 is None: self.data.street = street_tmp_1 # Since there is no original address format we create one if self.data.housenumber is not None: self.data.original = '{} {}'.format( street_tmp_1, self.data.housenumber) else: self.data.original = '{}'.format(street_tmp_1) # Street with types elif street_tmp_1 is not None and street_tmp_2 is not None: self.data.street = '{} {}'.format( street_tmp_1, street_tmp_2) # Since there is no original address format we create one if self.data.housenumber is not None: self.data.original = '{} {} {}'.format(street_tmp_1, street_tmp_2, self.data.housenumber) else: self.data.original = '{} {}'.format( street_tmp_1, street_tmp_2) else: logging.error( 'Non handled state in street data processing!') self.data.phone = clean_phone_to_str(e.phonearea.get_text()) \ if e.phonearea is not None else None self.data.email = e.email.get_text().strip() if e.email is not None else None self.data.add() except Exception as err: logging.error(err) logging.error(e) logging.exception('Exception occurred') except Exception as err: logging.exception('Exception occurred') logging.error(err)
def process(self): try: soup = save_downloaded_soup( '{}'.format(self.link), os.path.join(self.download_cache, self.filename), self.filetype) if soup is not None: # parse the html using beautiful soap and store in variable `soup` text = json.loads(extract_javascript_variable( soup, 'markers', True), strict=False) for poi_data in text: self.data.name = 'Avia' self.data.code = 'huaviafu' if self.data.city is None: self.data.city = poi_data['title'] self.data.ref = poi_data['kutid'] if poi_data['kutid'] is not None and poi_data['kutid'] != '' \ else None self.data.lat, self.data.lon = check_hu_boundary( poi_data['lat'], poi_data['lng']) if poi_data['cim'] is not None and poi_data['cim'] != '': self.data.postcode, self.data.city, self.data.street, self.data.housenumber, \ self.data.conscriptionnumber = extract_all_address( poi_data['cim']) self.data.website = '/toltoallomas/?id={}'.format(str(poi_data['kutid'])) \ if poi_data['kutid'] is not None and poi_data['kutid'] != '' else None self.data.original = poi_data['cim'] if 'tel' in poi_data and poi_data['tel'] != '': self.data.phone = clean_phone_to_str(poi_data['tel']) else: self.data.phone = None if 'email' in poi_data and poi_data['email'] != '': self.data.email = clean_email(poi_data['email']) else: self.data.email = None self.data.public_holiday_open = False self.data.fuel_octane_95 = True if poi_data.get('b95') == '1' or poi_data.get('b95g') == '1' \ else False self.data.fuel_diesel = True if poi_data.get('dies') == '1' or poi_data.get('gdies') == '1' \ else False self.data.fuel_octane_98 = True if poi_data.get( 'b98') == '1' else False self.data.fuel_lpg = True if poi_data.get( 'lpg') == '1' else False self.data.fuel_e85 = True if poi_data.get( 'e85') == '1' else False self.data.rent_lpg_bottles = True if poi_data.get( 'pgaz') == '1' else False self.data.compressed_air = True if poi_data.get( 'komp') == '1' else False self.data.restaurant = True if poi_data.get( 'etterem') == '1' else False self.data.food = True if poi_data.get( 'bufe') == '1' else False self.data.truck = True if poi_data.get( 'kpark') == '1' else False self.data.add() except Exception as e: logging.exception('Exception occurred') logging.error(e)