Exemple #1
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['street'])
             if 'BENU Gyógyszertár' not in poi_data['title']:
                 name = poi_data['title'].strip()
                 branch = None
             else:
                 name = 'Benu gyógyszertár'
                 branch = poi_data['title'].strip()
             code = 'hubenupha'
             website = poi_data['description'].strip(
             ) if poi_data['description'] is not None else None
             website = website[19:]
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             city = clean_city(poi_data['city'])
             postcode = poi_data['postal_code'].strip()
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['street']
             ref = None
             if 'phone' in poi_data and poi_data['phone'] != '':
                 phone = clean_phone(poi_data['phone'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
 def process(self):
     xml = save_downloaded_xml(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     root = etree.fromstring(xml)
     for e in root.iter('place'):
         name = 'MOL Bubi'
         code = 'hububibir'
         housenumber = None
         conscriptionnumber = None
         street = None
         city = 'Budapest'
         branch = e.attrib['name'].split(
             '-')[1].strip() if e.attrib['name'] is not None else None
         ref = e.attrib['name'].split(
             '-')[0].strip() if e.attrib['name'] is not None else None
         capacity = e.attrib['bike_racks'].strip(
         ) if e.attrib['bike_racks'] is not None else None
         website = None
         nonstop = True
         mo_o = None
         th_o = None
         we_o = None
         tu_o = None
         fr_o = None
         sa_o = None
         su_o = None
         mo_c = None
         th_c = None
         we_c = None
         tu_c = None
         fr_c = None
         sa_c = None
         su_c = None
         lat, lon = check_hu_boundary(e.attrib['lat'].replace(',', '.'),
                                      e.attrib['lng'].replace(',', '.'))
         geom = check_geom(lat, lon)
         postcode = query_postcode_osm_external(self.prefer_osm_postcode,
                                                self.session, lat, lon,
                                                None)
         original = None
         ref = None
         phone = None
         email = None
         insert_data.append([
             code, postcode, city, name, branch, website, original, street,
             housenumber, conscriptionnumber, ref, phone, email, geom,
             nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c,
             we_c, tu_c, fr_c, sa_c, su_c
         ])
     print(insert_data)
     if len(insert_data) < 1:
         logging.warning('Resultset is empty. Skipping ...')
     else:
         df = pd.DataFrame(insert_data)
         df.columns = POI_COLS
         insert_poi_dataframe(self.session, df)
 def process(self):
     if self.link:
         with open(self.link, 'r') as f:
             insert_data = []
             text = json.load(f)
             for poi_data in text['results']:
                 first_element = next(iter(poi_data))
                 if self.name == 'CIB bank':
                     name = 'CIB bank'
                     code = 'hucibbank'
                 else:
                     name = 'CIB'
                     code = 'hucibatm'
                 postcode, city, street, housenumber, conscriptionnumber = extract_all_address(
                     poi_data[first_element]['address'])
                 branch = None
                 website = None
                 nonstop = None
                 mo_o = None
                 th_o = None
                 we_o = None
                 tu_o = None
                 fr_o = None
                 sa_o = None
                 su_o = None
                 mo_c = None
                 th_c = None
                 we_c = None
                 tu_c = None
                 fr_c = None
                 sa_c = None
                 su_c = None
                 lat, lon = check_hu_boundary(
                     poi_data[first_element]['latitude'],
                     poi_data[first_element]['longitude'])
                 geom = check_geom(lat, lon)
                 postcode = query_postcode_osm_external(
                     self.prefer_osm_postcode, self.session, lat, lon,
                     postcode)
                 original = poi_data[first_element]['address']
                 ref = None
                 phone = None
                 email = None
                 insert_data.append([
                     code, postcode, city, name, branch, website, original,
                     street, housenumber, conscriptionnumber, ref, phone,
                     email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o,
                     sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                 ])
             if len(insert_data) < 1:
                 logging.warning('Resultset is empty. Skipping ...')
             else:
                 df = pd.DataFrame(insert_data)
                 df.columns = POI_COLS
                 insert_poi_dataframe(self.session, df)
Exemple #4
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), POST_DATA)
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             name = 'MOL'
             code = 'humolfu'
             postcode = poi_data['postcode'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             city = clean_city(poi_data['city'])
             branch = None
             website = None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data['address']
             ref = None
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            for pla in soup.findAll('place'):
                try:
                    self.data.name = 'MOL Bubi'
                    self.data.code = 'hububibir'
                    self.data.city = 'Budapest'
                    if pla.get('name') is not None and pla.get('name') != '':
                        self.data.branch = pla.get('name').split('-')[1].strip() \
                            if pla.get('name') is not None else None
                        self.data.ref = pla.get('name').split('-')[0].strip() \
                            if pla.get('name') is not None else None
                    self.data.nonstop = True
                    # self.data.capacity = pla.attrib['bike_racks'].strip() \
                    # if pla.attrib['bike_racks'] is not None else None
                    self.data.lat, self.data.lon = \
                        check_hu_boundary(pla.get('lat').replace(',', '.'),
                                          pla.get('lng').replace(',', '.'))
                    self.data.postcode = query_postcode_osm_external(
                        True, self.session, self.data.lon, self.data.lat, None)
                    self.data.public_holiday_open = True
                    self.data.add()
                except Exception as e:
                    logging.error(e)
                    logging.error(pla)
                    logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')

            logging.error(soup)
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), POST_DATA)
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text['results']:
             name = 'OMV'
             code = 'huomvfu'
             postcode = poi_data['postcode'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address_l'])
             city = clean_city(poi_data['town_l'])
             branch = None
             website = None
             nonstop = None
             if poi_data['open_hours'] is not None:
                 oho, ohc = clean_opening_hours(poi_data['open_hours'])
                 if oho == '00:00' and ohc == '24:00':
                     nonstop = True
                     oho, ohc = None, None
             else:
                 oho, ohc = None, None
             mo_o = oho
             th_o = oho
             we_o = oho
             tu_o = oho
             fr_o = oho
             sa_o = oho
             su_o = oho
             mo_c = ohc
             th_c = ohc
             we_c = ohc
             tu_c = ohc
             fr_c = ohc
             sa_c = ohc
             su_c = ohc
             original = poi_data['address_l']
             ref = None
             lat, lon = check_hu_boundary(poi_data['y'], poi_data['x'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             if 'telnr' in poi_data and poi_data['telnr'] != '':
                 phone = clean_phone(poi_data['telnr'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemple #7
0
 def process(self):
     xml = save_downloaded_xml(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     root = etree.fromstring(xml)
     for e in root.findall('post'):
         if e.find('ServicePointType').text == 'PM':
             name = 'Posta'
             code = 'hupostapo'
         elif e.find('ServicePointType').text == 'CS':
             name = 'Posta csomagautomata'
             code = 'hupostacso'
         elif e.find('ServicePointType').text == 'PP':
             name = 'PostaPont'
             code = 'hupostapp'
         else:
             logging.error('Non existing Posta type.')
         postcode = e.get('zipCode')
         street_tmp_1 = e.find('street/name').text.strip() if e.find(
             'street/name').text is not None else None
         street_tmp_2 = e.find('street/type').text.strip() if e.find(
             'street/type').text is not None else None
         if street_tmp_1 is None:
             street = None
         elif street_tmp_2 is None:
             street = street_tmp_1
         elif street_tmp_1 is not None and street_tmp_2 is not None:
             street = '{} {}'.format(street_tmp_1, street_tmp_2)
         else:
             logging.error('Non handled state!')
         housenumber = e.find('street/houseNumber').text.strip().lower(
         ) if e.find('street/houseNumber').text is not None else None
         conscriptionnumber = None
         city = clean_city(e.find('city').text)
         branch = e.find('name').text if e.find(
             'name').text is not None else None
         website = None
         nonstop = None
         mo_o = None
         th_o = None
         we_o = None
         tu_o = None
         fr_o = None
         sa_o = None
         su_o = None
         mo_c = None
         th_c = None
         we_c = None
         tu_c = None
         fr_c = None
         sa_c = None
         su_c = None
         lat, lon = check_hu_boundary(
             e.find('gpsData/WGSLat').text.replace(',', '.'),
             e.find('gpsData/WGSLon').text.replace(',', '.'))
         geom = check_geom(lat, lon)
         postcode = query_postcode_osm_external(self.prefer_osm_postcode,
                                                self.session, lat, lon,
                                                postcode)
         original = None
         ref = None
         phone = None
         email = None
         insert_data.append([
             code, postcode, city, name, branch, website, original, street,
             housenumber, conscriptionnumber, ref, phone, email, geom,
             nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c,
             we_c, tu_c, fr_c, sa_c, su_c
         ])
     if len(insert_data) < 1:
         logging.warning('Resultset is empty. Skipping ...')
     else:
         df = pd.DataFrame(insert_data)
         df.columns = POI_COLS
         insert_poi_dataframe(self.session, df)
Exemple #8
0
 def process(self):
     soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), None,
                                 self.verify_link)
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         pattern = re.compile('^\s*var\s*places.*')
         script = soup.find('script', text=pattern)
         m = pattern.match(script.get_text())
         data = m.group(0)
         data = clean_javascript_variable(data, 'places')
         text = json.loads(data)
         for poi_data in text:
             poi_data = poi_data['addresses'][0]
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             name = 'Rossmann'
             code = 'hurossmche'
             city = clean_city(poi_data['city'])
             postcode = poi_data['zip'].strip()
             branch = None
             website = None
             nonstop = False
             if poi_data['business_hours']['monday'] is not None:
                 mo_o, mo_c = clean_opening_hours(poi_data['business_hours']['monday'])
             else:
                 mo_o, mo_c = None, None
             if poi_data['business_hours']['tuesday'] is not None:
                 th_o, th_c = clean_opening_hours(poi_data['business_hours']['tuesday'])
             else:
                 th_o, th_c = None, None
             if poi_data['business_hours']['wednesday'] is not None:
                 we_o, we_c = clean_opening_hours(poi_data['business_hours']['wednesday'])
             else:
                 we_o, we_c = None, None
             if poi_data['business_hours']['thursday'] is not None:
                 tu_o, tu_c = clean_opening_hours(poi_data['business_hours']['thursday'])
             else:
                 tu_o, tu_c = None, None
             if poi_data['business_hours']['friday'] is not None:
                 fr_o, fr_c = clean_opening_hours(poi_data['business_hours']['friday'])
             else:
                 fr_o, fr_c = None, None
             if poi_data['business_hours']['saturday'] is not None:
                 sa_o, sa_c = clean_opening_hours(poi_data['business_hours']['saturday'])
             else:
                 sa_o, sa_c = None, None
             if poi_data['business_hours']['sunday'] is not None:
                 su_o, su_c = clean_opening_hours(poi_data['business_hours']['sunday'])
             else:
                 su_o, su_c = None, None
             lat, lon = check_hu_boundary(poi_data['position'][0], poi_data['position'][1])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['address']
             ref = None
             phone = None
             email = None
             insert_data.append(
                 [code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber,
                  ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c,
                  fr_c, sa_c, su_c])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             if 'xpres' in poi_data['name']:
                 name = 'Spar Expressz'
                 code = 'husparexp'
             elif 'INTER' in poi_data['name']:
                 name = 'Interspar'
                 code = 'husparint'
             elif 'market' in poi_data['name']:
                 name = 'Spar'
                 code = 'husparsup'
             else:
                 name = 'Spar'
                 code = 'husparsup'
             poi_data['name'] = poi_data['name'].replace(
                 'INTERSPAR', 'Interspar')
             poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar')
             ref_match = PATTERN_SPAR_REF.search(poi_data['name'])
             ref = ref_match.group(
                 1).strip() if ref_match is not None else None
             city = clean_city(poi_data['city'])
             postcode = poi_data['zipCode'].strip()
             branch = poi_data['name'].split('(')[0].strip()
             website = poi_data['pageUrl'].strip()
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             lat, lon = check_hu_boundary(poi_data['latitude'],
                                          poi_data['longitude'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['address']
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
 def process(self):
     csv = save_downloaded_pd(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     if csv is not None:
         csv[['Post code']] = csv[['Post code']].fillna('0000')
         csv[['Post code']] = csv[['Post code']].astype(int)
         csv[['Telephone']] = csv[['Telephone']].fillna('0')
         csv[['Telephone']] = csv[['Telephone']].astype(int)
         csv[['City']] = csv[['City']].fillna('')
         csv[['Name']] = csv[['Name']].fillna('')
         insert_data = []
         poi_dict = csv.to_dict('records')
         for poi_data in poi_dict:
             if poi_data['Brand'] == 'Shell':
                 name = 'Shell'
                 code = 'hushellfu'
             elif poi_data['Brand'] == 'Mobilpetrol':
                 name = 'Mobil Petrol'
                 code = 'humobpefu'
             postcode = poi_data['Post code']
             steet_tmp = poi_data['Address'].lower().split()
             for i in range(0, len(steet_tmp) - 2):
                 steet_tmp[i] = steet_tmp[i].capitalize()
             steet_tmp = ' '.join(steet_tmp)
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 steet_tmp)
             if poi_data['City'] != '':
                 city = clean_city(poi_data['City'].title())
             else:
                 if poi_data['Name'] != '':
                     city = clean_city(poi_data['Name'].title())
                 else:
                     city = None
             branch = poi_data['Name'].strip()
             website = None
             if poi_data['24 Hour'] == True:
                 nonstop = True
                 mo_o = None
                 th_o = None
                 we_o = None
                 tu_o = None
                 fr_o = None
                 sa_o = None
                 su_o = None
                 mo_c = None
                 th_c = None
                 we_c = None
                 tu_c = None
                 fr_c = None
                 sa_c = None
                 su_c = None
             else:
                 nonstop = False
                 mo_o = '06:00'
                 th_o = '06:00'
                 we_o = '06:00'
                 tu_o = '06:00'
                 fr_o = '06:00'
                 sa_o = '06:00'
                 su_o = '06:00'
                 mo_c = '22:00'
                 th_c = '22:00'
                 we_c = '22:00'
                 tu_c = '22:00'
                 fr_c = '22:00'
                 sa_c = '22:00'
                 su_c = '22:00'
             original = poi_data['Address']
             ref = None
             lat, lon = check_hu_boundary(poi_data['GPS Latitude'],
                                          poi_data['GPS Longitude'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             if 'Telephone' in poi_data and poi_data['Telephone'] != '':
                 phone = clean_phone(str(poi_data['Telephone']))
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
def online_poi_matching(args):
    data, comm_data = args
    try:
        db = POIBase('{}://{}:{}@{}:{}/{}'.format(
            config.get_database_type(), config.get_database_writer_username(),
            config.get_database_writer_password(),
            config.get_database_writer_host(),
            config.get_database_writer_port(),
            config.get_database_poi_database()))
        pgsql_pool = db.pool
        session_factory = sessionmaker(pgsql_pool)
        Session = scoped_session(session_factory)
        session = Session()
        osm_live_query = OsmApi()
        for i, row in data.iterrows():
            # for i, row in data[data['poi_code'].str.contains('posta')].iterrows():
            try:
                # Try to search OSM POI with same type, and name contains poi_search_name within the specified distance
                osm_query = db.query_osm_shop_poi_gpd(
                    row.get('poi_lon'), row.get('poi_lat'),
                    comm_data.loc[comm_data['pc_id'] == row.get(
                        'poi_common_id')]['poi_type'].values[0],
                    row.get('poi_search_name'),
                    row.get('poi_search_avoid_name'),
                    row.get('poi_addr_street'),
                    row.get('poi_addr_housenumber'),
                    row.get('poi_conscriptionnumber'), row.get('poi_city'),
                    row.get('osm_search_distance_perfect'),
                    row.get('osm_search_distance_safe'),
                    row.get('osm_search_distance_unsafe'))
                # Enrich our data with OSM database POI metadata
                if osm_query is not None:
                    row['poi_new'] = False
                    # Collect additional OSM metadata. Note: this needs style change during osm2pgsql
                    osm_id = osm_query['osm_id'].values[0] if osm_query.get(
                        'osm_id') is not None else None
                    osm_node = osm_query.get(
                        'node').values[0] if osm_query.get(
                            'node') is not None else None
                    # Set OSM POI coordinates for all kind of geom
                    lat = osm_query.get('lat').values[0]
                    lon = osm_query.get('lon').values[0]
                    if data.at[i,
                               'poi_lat'] != lat and data.at[i,
                                                             'poi_lon'] != lon:
                        logging.info(
                            'Using new coodinates %s %s instead of %s %s.',
                            lat, lon, data.at[i, 'poi_lat'],
                            data.at[i, 'poi_lon'])
                        data.at[i, 'poi_lat'] = lat
                        data.at[i, 'poi_lon'] = lon
                    if osm_node == 'node':
                        osm_node = OSM_object_type.node
                    elif osm_node == 'way':
                        osm_node = OSM_object_type.way
                    elif osm_node == 'relation':
                        osm_node = OSM_object_type.relation
                    else:
                        logging.warning('Illegal state: %s',
                                        osm_query['node'].values[0])
                    data.at[i, 'osm_id'] = osm_id
                    data.at[i, 'osm_node'] = osm_node
                    # Refine postcode
                    if row['preserve_original_post_code'] is not True:
                        # Current OSM postcode based on lat,long query.
                        postcode = query_postcode_osm_external(
                            config.get_geo_prefer_osm_postcode(), session, lon,
                            lat, row.get('poi_postcode'))
                        force_postcode_change = False  # TODO: Has to be a setting in app.conf
                        if force_postcode_change is True:
                            # Force to use datasource postcode
                            if postcode != row.get('poi_postcode'):
                                logging.info(
                                    'Changing postcode from %s to %s.',
                                    row.get('poi_postcode'), postcode)
                                data.at[i, 'poi_postcode'] = postcode
                        else:
                            # Try to use smart method for postcode check
                            ch_posctode = smart_postcode_check(
                                row, osm_query, postcode)
                            if ch_posctode is not None:
                                data.at[i, 'poi_postcode'] = ch_posctode
                    else:
                        logging.info('Preserving original postcode %s',
                                     row.get('poi_postcode'))
                    data.at[i, 'osm_version'] = osm_query['osm_version'].values[0] \
                        if osm_query['osm_version'] is not None else None
                    data.at[i, 'osm_changeset'] = osm_query['osm_changeset'].values[0] \
                        if osm_query['osm_changeset'] is not None else None
                    if osm_query['osm_timestamp'] is not None:
                        osm_query['osm_timestamp'] = \
                            data.at[i, 'osm_timestamp'] = pd.to_datetime(str((osm_query['osm_timestamp'].values[0])))
                    else:
                        osm_query['osm_timestamp'] = None
                    data.at[i, 'poi_distance'] = osm_query.get(
                        'distance').values[0] if osm_query.get(
                            'distance') is not None else None
                    # For OSM way also query node points
                    if osm_node == OSM_object_type.way:
                        logging.info(
                            'This is an OSM way looking for id %s nodes.',
                            osm_id)
                        # Add list of nodes to the dataframe
                        nodes = db.query_ways_nodes(osm_id)
                        data.at[i, 'osm_nodes'] = nodes
                    elif osm_node == OSM_object_type.relation:
                        logging.info(
                            'This is an OSM relation looking for id %s nodes.',
                            osm_id)
                        # Add list of relation nodes to the dataframe
                        nodes = db.query_relation_nodes(osm_id)
                        data.at[i, 'osm_nodes'] = nodes
                    logging.info(
                        'Old %s (not %s) type: %s POI within %s m: %s %s, %s %s (%s)',
                        data.at[i, 'poi_search_name'],
                        data.at[i,
                                'poi_search_avoid_name'], data.at[i,
                                                                  'poi_type'],
                        data.at[i, 'poi_distance'], data.at[i, 'poi_postcode'],
                        data.at[i, 'poi_city'], data.at[i, 'poi_addr_street'],
                        data.at[i, 'poi_addr_housenumber'],
                        data.at[i, 'poi_conscriptionnumber'])
                    try:
                        # Download OSM POI way live tags
                        if osm_node == OSM_object_type.way:
                            for rtc in range(0, RETRY):
                                logging.info(
                                    'Downloading OSM live tags to this way: %s.',
                                    osm_id)
                                cached_way = db.query_from_cache(
                                    osm_id, osm_node)
                                if cached_way is None:
                                    live_tags_container = osm_live_query.WayGet(
                                        osm_id)
                                    if live_tags_container is not None:
                                        data.at[
                                            i,
                                            'osm_live_tags'] = live_tags_container.get(
                                                'tag')
                                        cache_row = {
                                            'osm_id':
                                            int(osm_id),
                                            'osm_live_tags':
                                            live_tags_container.get('tag'),
                                            'osm_version':
                                            live_tags_container.get('version'),
                                            'osm_user':
                                            live_tags_container.get('user'),
                                            'osm_user_id':
                                            live_tags_container.get('uid'),
                                            'osm_changeset':
                                            live_tags_container.get(
                                                'changeset'),
                                            'osm_timestamp':
                                            live_tags_container.get(
                                                'timestamp'),
                                            'osm_object_type':
                                            osm_node,
                                            'osm_lat':
                                            None,
                                            'osm_lon':
                                            None,
                                            'osm_nodes':
                                            live_tags_container.get('nd')
                                        }
                                        get_or_create_cache(
                                            session, POI_OSM_cache,
                                            **cache_row)
                                        # Downloading referenced nodes of the way
                                        for way_nodes in live_tags_container[
                                                'nd']:
                                            logging.debug(
                                                'Getting node %s belongs to way %s',
                                                way_nodes, osm_id)
                                            live_tags_node = osm_live_query.NodeGet(
                                                way_nodes)
                                            cache_row = {
                                                'osm_id':
                                                int(way_nodes),
                                                'osm_live_tags':
                                                live_tags_node.get('tag'),
                                                'osm_version':
                                                live_tags_node.get('version'),
                                                'osm_user':
                                                live_tags_node.get('user'),
                                                'osm_user_id':
                                                live_tags_node.get('uid'),
                                                'osm_changeset':
                                                live_tags_node.get(
                                                    'changeset'),
                                                'osm_timestamp':
                                                live_tags_node.get(
                                                    'timestamp'),
                                                'osm_object_type':
                                                OSM_object_type.node,
                                                'osm_lat':
                                                live_tags_node.get('lat'),
                                                'osm_lon':
                                                live_tags_node.get('lon'),
                                                'osm_nodes':
                                                None
                                            }
                                            get_or_create_cache(
                                                session, POI_OSM_cache,
                                                **cache_row)
                                        break
                                    else:
                                        logging.warning(
                                            'Download of external data has failed.'
                                        )
                                else:
                                    data.at[i,
                                            'osm_live_tags'] = cached_way.get(
                                                'osm_live_tags')
                                    break
                            session.commit()
                        # Download OSM POI node live tags
                        elif osm_node == OSM_object_type.node:
                            for rtc in range(0, RETRY):
                                logging.info(
                                    'Downloading OSM live tags to this node: %s.',
                                    osm_id)
                                cached_node = db.query_from_cache(
                                    osm_id, osm_node)
                                if cached_node is None:
                                    live_tags_container = osm_live_query.NodeGet(
                                        osm_id)
                                    if live_tags_container is not None:
                                        data.at[
                                            i,
                                            'osm_live_tags'] = live_tags_container.get(
                                                'tag')
                                        cache_row = {
                                            'osm_id':
                                            int(osm_id),
                                            'osm_live_tags':
                                            live_tags_container.get('tag'),
                                            'osm_version':
                                            live_tags_container.get('version'),
                                            'osm_user':
                                            live_tags_container.get('user'),
                                            'osm_user_id':
                                            live_tags_container.get('uid'),
                                            'osm_changeset':
                                            live_tags_container.get(
                                                'changeset'),
                                            'osm_timestamp':
                                            live_tags_container.get(
                                                'timestamp'),
                                            'osm_object_type':
                                            osm_node,
                                            'osm_lat':
                                            live_tags_container.get('lat'),
                                            'osm_lon':
                                            live_tags_container.get('lon'),
                                            'osm_nodes':
                                            None
                                        }
                                        get_or_create_cache(
                                            session, POI_OSM_cache,
                                            **cache_row)
                                        break
                                    else:
                                        logging.warning(
                                            'Download of external data has failed.'
                                        )
                                else:
                                    data.at[i,
                                            'osm_live_tags'] = cached_node.get(
                                                'osm_live_tags')
                                    break
                            session.commit()
                        elif osm_node == OSM_object_type.relation:
                            for rtc in range(0, RETRY):
                                logging.info(
                                    'Downloading OSM live tags to this relation: %s.',
                                    osm_id)
                                live_tags_container = osm_live_query.RelationGet(
                                    abs(osm_id))
                                if live_tags_container is not None:
                                    data.at[
                                        i,
                                        'osm_live_tags'] = live_tags_container.get(
                                            'tag')
                                    break
                                else:
                                    logging.warning(
                                        'Download of external data has failed.'
                                    )
                            session.commit()
                        else:
                            logging.warning('Invalid state for live tags.')

                    except Exception as e:
                        logging.warning(
                            'There was an error during OSM request: %s.', e)
                        logging.exception('Exception occurred')
                        logging.warning('Live tag is: {}'.format(
                            cached_node.get('osm_live_tags')))
                # This is a new POI
                else:
                    # This is a new POI - will add fix me tag to the new items.
                    data.at[i, 'poi_new'] = True
                    # Get the first character of then name of POI and generate a floating number between 0 and 1
                    # for a PostGIS function: https://postgis.net/docs/ST_LineInterpolatePoint.html
                    # If there is more than one POI in a building this will try to do a different location and
                    # not only on center or not only on edge
                    ib = row.get('poi_name')
                    if ib is not None:
                        ibp = 1 - (((ord(ib[0]) // 16) + 1) / 17)
                    else:
                        ibp = 0.50
                    # Refine postcode
                    osm_bulding_q = db.query_osm_building_poi_gpd(
                        row.get('poi_lon'),
                        row.get('poi_lat'),
                        row.get('poi_city'),
                        row.get('poi_postcode'),
                        row.get('poi_addr_street'),
                        row.get('poi_addr_housenumber'),
                        in_building_percentage=ibp)
                    if osm_bulding_q is not None:
                        logging.info(
                            'Relocating POI coordinates to the building with same address: %s %s, %s %s',
                            row.get('poi_lat'), row.get('poi_lon'),
                            osm_bulding_q.get('lat')[0],
                            osm_bulding_q.get('lon')[0]),
                        row['poi_lat'], row['poi_lon'] = osm_bulding_q.get(
                            'lat')[0], osm_bulding_q.get('lon')[0]
                    else:
                        logging.info(
                            'The POI is already in its building or there is no building match. \
                            Keeping POI coordinates as is as.')
                    if row['preserve_original_post_code'] is not True:
                        postcode = query_postcode_osm_external(
                            config.get_geo_prefer_osm_postcode(), session,
                            data.at[i, 'poi_lon'], data.at[i, 'poi_lat'],
                            row.get('poi_postcode'))
                        if postcode != row.get('poi_postcode'):
                            logging.info('Changing postcode from %s to %s.',
                                         row.get('poi_postcode'), postcode)
                            data.at[i, 'poi_postcode'] = postcode
                    else:
                        logging.info('Preserving original postcode %s',
                                     row.get('poi_postcode'))
                    logging.info(
                        'New %s (not %s) type: %s POI: %s %s, %s %s (%s)',
                        row.get('poi_search_name'),
                        row.get('poi_search_avoid_name'), row.get('poi_type'),
                        row.get('poi_postcode'), row.get('poi_city'),
                        row.get('poi_addr_street'),
                        row.get('poi_addr_housenumber'),
                        row.get('poi_conscriptionnumber'))
            except Exception as e:
                logging.error(e)
                logging.error(row)
                logging.exception('Exception occurred')

        session.commit()
        return data
    except Exception as e:
        logging.error(e)
        logging.exception('Exception occurred')
Exemple #12
0
    def process(self):
        soup = save_downloaded_soup(
            '{}'.format(self.link),
            os.path.join(self.download_cache, self.filename))
        insert_data = []
        if soup != None:
            # parse the html using beautiful soap and store in variable `soup`
            pattern = re.compile('^\s*var\s*boltok_nyers.*')
            script = soup.find('script', text=pattern)
            m = pattern.match(script.get_text())
            data = m.group(0)
            data = clean_javascript_variable(data, 'boltok_nyers')
            text = json.loads(data)
            # for l in text:
            # print ('postcode: {postcode}; city: {city}; address: {address}; alt_name: {alt_name}'.format(postcode=l['A_IRSZ'], city=l['A_VAROS'], address=l['A_CIM'], alt_name=l['P_NAME']))

            for poi_data in text:
                # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                    poi_data['A_CIM'])
                city = clean_city(poi_data['A_VAROS'])
                postcode = poi_data['A_IRSZ'].strip()
                branch = poi_data['P_NAME'].strip()
                name = 'Príma' if 'Príma' in branch else 'CBA'
                code = 'huprimacon' if 'Príma' in branch else 'hucbacon'
                website = None
                nonstop = None
                mo_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_1']
                ) if poi_data['PS_OPEN_FROM_1'] is not None else None
                th_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_2']
                ) if poi_data['PS_OPEN_FROM_2'] is not None else None
                we_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_3']
                ) if poi_data['PS_OPEN_FROM_3'] is not None else None
                tu_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_4']
                ) if poi_data['PS_OPEN_FROM_4'] is not None else None
                fr_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_5']
                ) if poi_data['PS_OPEN_FROM_5'] is not None else None
                sa_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_6']
                ) if poi_data['PS_OPEN_FROM_6'] is not None else None
                su_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_7']
                ) if poi_data['PS_OPEN_FROM_7'] is not None else None
                mo_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_1']
                ) if poi_data['PS_OPEN_TO_1'] is not None else None
                th_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_2']
                ) if poi_data['PS_OPEN_TO_2'] is not None else None
                we_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_3']
                ) if poi_data['PS_OPEN_TO_3'] is not None else None
                tu_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_4']
                ) if poi_data['PS_OPEN_TO_4'] is not None else None
                fr_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_5']
                ) if poi_data['PS_OPEN_TO_5'] is not None else None
                sa_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_6']
                ) if poi_data['PS_OPEN_TO_6'] is not None else None
                su_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_7']
                ) if poi_data['PS_OPEN_TO_7'] is not None else None
                original = poi_data['A_CIM']
                lat, lon = check_hu_boundary(poi_data['PS_GPS_COORDS_LAT'],
                                             poi_data['PS_GPS_COORDS_LNG'])
                geom = check_geom(lat, lon)
                postcode = query_postcode_osm_external(
                    self.prefer_osm_postcode, self.session, lat, lon, postcode)
                ref = None
                if 'PS_PUBLIC_TEL' in poi_data and poi_data[
                        'PS_PUBLIC_TEL'] != '':
                    phone = clean_phone(poi_data['PS_PUBLIC_TEL'])
                else:
                    phone = None
                if 'PS_PUBLIC_EMAIL' in poi_data and poi_data[
                        'PS_PUBLIC_EMAIL'] != '':
                    email = poi_data['PS_PUBLIC_EMAIL']
                else:
                    email = None
                insert_data.append([
                    code, postcode, city, name, branch, website, original,
                    street, housenumber, conscriptionnumber, ref, phone, email,
                    geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                    mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                ])
            if len(insert_data) < 1:
                logging.warning('Resultset is empty. Skipping ...')
            else:
                df = pd.DataFrame(insert_data)
                df.columns = POI_COLS
                insert_poi_dataframe(self.session, df)
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         pattern = re.compile('var\s*markers\s*=\s*((.*\n)*\]\;)',
                              re.MULTILINE)
         script = soup.find('script', text=pattern)
         m = pattern.search(script.get_text())
         data = m.group(0)
         data = data.replace("'", '"')
         data = clean_javascript_variable(data, 'markers')
         text = json.loads(data)
         for poi_data in text:
             if poi_data['cim'] is not None and poi_data['cim'] != '':
                 postcode, city, street, housenumber, conscriptionnumber = extract_all_address(
                     poi_data['cim'])
             name = 'Avia'
             code = 'huaviafu'
             branch = None
             if city is None:
                 city = poi_data['title']
             ref = poi_data['kutid'] if poi_data[
                 'kutid'] is not None and poi_data['kutid'] != '' else None
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             website = '/toltoallomas/?id={}'.format(str(poi_data['kutid'])) if poi_data['kutid'] is not None and \
                                                                                poi_data['kutid'] != '' else None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data['cim']
             if 'tel' in poi_data and poi_data['tel'] != '':
                 phone = clean_phone(poi_data['tel'])
             else:
                 phone = None
             if 'email' in poi_data and poi_data['email'] != '':
                 email = clean_email(poi_data['email'])
             else:
                 email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemple #14
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         # script = soup.find('div', attrs={'data-stores':True})
         script = soup.find(attrs={'data-stores': True})
         text = json.loads(script['data-stores'])
         for poi_data in text:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             city = clean_city(poi_data['city'])
             branch = poi_data['name']
             if 'xpres' in poi_data['name']:
                 name = 'Tesco Expressz'
                 code = 'hutescoexp'
             elif 'xtra' in poi_data['name']:
                 name = 'Tesco Extra'
                 code = 'hutescoext'
             else:
                 name = 'Tesco'
                 code = 'hutescosup'
             website = poi_data['url']
             nonstop = None
             opening = json.loads(poi_data['opening'])
             mo_o = opening['1'][0]
             th_o = opening['2'][0]
             we_o = opening['3'][0]
             tu_o = opening['4'][0]
             fr_o = opening['5'][0]
             sa_o = opening['6'][0]
             su_o = opening['0'][0]
             mo_c = opening['1'][1]
             th_c = opening['2'][1]
             we_c = opening['3'][1]
             tu_c = opening['4'][1]
             fr_c = opening['5'][1]
             sa_c = opening['6'][1]
             su_c = opening['0'][1]
             lat, lon = check_hu_boundary(poi_data['gpslat'],
                                          poi_data['gpslng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, None)
             original = poi_data['address']
             ref = None
             if 'phone' in poi_data and poi_data['phone'] != '':
                 phone = clean_phone(poi_data['phone'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemple #15
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             name = 'Foxpost'
             code = 'hufoxpocso'
             postcode = poi_data['zip'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['street'])
             city = clean_city(poi_data['city'])
             branch = poi_data['name']
             website = None
             nonstop = None
             if poi_data['open']['hetfo'] is not None:
                 mo_o, mo_c = clean_opening_hours(poi_data['open']['hetfo'])
             else:
                 mo_o, mo_c = None, None
             if poi_data['open']['kedd'] is not None:
                 th_o, th_c = clean_opening_hours(poi_data['open']['kedd'])
             else:
                 th_o, th_c = None, None
             if poi_data['open']['szerda'] is not None:
                 we_o, we_c = clean_opening_hours(
                     poi_data['open']['szerda'])
             else:
                 we_o, we_c = None, None
             if poi_data['open']['csutortok'] is not None:
                 tu_o, tu_c = clean_opening_hours(
                     poi_data['open']['csutortok'])
             else:
                 tu_o, tu_c = None, None
             if poi_data['open']['pentek'] is not None:
                 fr_o, fr_c = clean_opening_hours(
                     poi_data['open']['pentek'])
             else:
                 fr_o, fr_c = None, None
             if poi_data['open']['szombat'] is not None:
                 sa_o, sa_c = clean_opening_hours(
                     poi_data['open']['szombat'])
             else:
                 sa_o, sa_c = None, None
             if poi_data['open']['vasarnap'] is not None:
                 su_o, su_c = clean_opening_hours(
                     poi_data['open']['vasarnap'])
             else:
                 su_o, su_c = None, None
             original = poi_data['address']
             ref = None
             lat, lon = check_hu_boundary(poi_data['geolat'],
                                          poi_data['geolng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
    def process(self):
        '''
        soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), POST_DATA)
        insert_data = []
        if soup != None:

            text = json.loads(soup.get_text())
        '''
        with open(os.path.join(self.download_cache, self.filename), 'r') as f:
            insert_data = []
            text = json.load(f)
            for poi_data in text:
                street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                    poi_data['cim'])
                if 'Kulcs patika' not in poi_data['nev']:
                    name = poi_data['nev'].strip()
                    branch = None
                else:
                    name = 'Kulcs patika'
                    branch = poi_data['nev'].strip()
                code = 'hukulcspha'
                website = poi_data['link'].strip(
                ) if poi_data['link'] is not None else None
                nonstop = None
                mo_o = None
                th_o = None
                we_o = None
                tu_o = None
                fr_o = None
                sa_o = None
                su_o = None
                mo_c = None
                th_c = None
                we_c = None
                tu_c = None
                fr_c = None
                sa_c = None
                su_c = None
                city = clean_city(poi_data['helyseg'])
                lat, lon = check_hu_boundary(
                    poi_data['marker_position']['latitude'],
                    poi_data['marker_position']['longitude'])
                geom = check_geom(lat, lon)
                postcode = query_postcode_osm_external(
                    self.prefer_osm_postcode, self.session, lat, lon,
                    poi_data['irsz'].strip())
                original = poi_data['cim']
                ref = None
                phone = None
                email = None
                insert_data.append([
                    code, postcode, city, name, branch, website, original,
                    street, housenumber, conscriptionnumber, ref, phone, email,
                    geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                    mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                ])
            if len(insert_data) < 1:
                logging.warning('Resultset is empty. Skipping ...')
            else:
                df = pd.DataFrame(insert_data)
                df.columns = POI_COLS
                insert_poi_dataframe(self.session, df)