Esempio n. 1
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['street'])
             if 'BENU Gyógyszertár' not in poi_data['title']:
                 name = poi_data['title'].strip()
                 branch = None
             else:
                 name = 'Benu gyógyszertár'
                 branch = poi_data['title'].strip()
             code = 'hubenupha'
             website = poi_data['description'].strip(
             ) if poi_data['description'] is not None else None
             website = website[19:]
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             city = clean_city(poi_data['city'])
             postcode = poi_data['postal_code'].strip()
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['street']
             ref = None
             if 'phone' in poi_data and poi_data['phone'] != '':
                 phone = clean_phone(poi_data['phone'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
    def process(self):
        try:
            if self.link:
                with open(self.link, 'r') as f:
                    text = json.load(f)
                    for poi_data in text['result']:
                        if poi_data.get('address') is not None:
                            if poi_data.get('type') == '1':
                                self.data.name = 'MagNet Bank ATM'
                                self.data.code = 'humagnatm'
                                self.data.public_holiday_open = True
                            elif poi_data.get('type') in ['0', '2']:
                                self.data.name = 'MagNet Bank'
                                self.data.code = 'humagnbank'
                                self.data.public_holiday_open = False
                                self.data.email = poi_data.get('email')
                                self.data.phone = '+36 1 428 8888'
                            else:
                                logging.info('Unknow type! ({})'.format(
                                    poi_data.get('type')))
                            self.data.postcode, self.data.city, self.data.street, self.data.housenumber, \
                                self.data.conscriptionnumber = extract_all_address(
                                    poi_data.get('address'))
                            self.data.lat, self.data.lon = check_hu_boundary(
                                poi_data.get('lat'), poi_data.get('lon'))
                            self.data.original = poi_data.get('address')
                        self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text['markets']:
                    self.data.name = 'Penny'
                    self.data.code = 'hupennysup'
                    self.data.postcode = poi_data['address']['zip'].strip()
                    street_tmp = poi_data['address']['street'].split(',')[0]
                    self.data.city = clean_city(poi_data['address']['city'])
                    self.data.original = poi_data['address']['street']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['address']['latitude'],
                        poi_data['address']['longitude'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        street_tmp.title())
                    if 'phone' in poi_data and poi_data['phone'] != '':
                        self.data.phone = clean_phone_to_str(poi_data['phone'])
                    if 'id' in poi_data and poi_data['id'] != '':
                        self.data.ref = poi_data['id'].strip()
                    self.data.public_holiday_open = False
                    # TODO: Parsing opening_hours from datasource
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                # script = soup.find('div', attrs={'data-stores':True})
                text = json.loads(str(soup))
                for poi_data in text['stores']:
                    try:
                        # Assign: code, postcode, city, name, branch, website, original, street, housenumber,
                        # conscriptionnumber, ref, geom
                        self.data.code = 'hutommacon'
                        if poi_data.get(
                                'name'
                        )[2] is not None and poi_data.get('name')[2] != '':
                            self.data.ref = poi_data.get('name')[2]
                        if poi_data.get(
                                'website'
                        ) is not None and poi_data.get('website') != '':
                            self.data.website = poi_data.get('website')
                        else:
                            self.data.website = 'https://tommarket.hu'
                        self.data.lat, self.data.lon = check_hu_boundary(
                            poi_data.get('lat'), poi_data.get('long'))
                        self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                            extract_street_housenumber_better_2(
                                poi_data.get('address'))
                        if poi_data.get('zip') is not None and poi_data.get(
                                'zip') != '':
                            self.data.postcode = poi_data.get('zip')
                        self.data.original = poi_data.get('address')
                        if poi_data.get(
                                'settlement'
                        ) is not None and poi_data.get('settlement') != '':
                            self.data.city = clean_city(
                                poi_data.get('settlement'))
                        else:
                            self.data.city = query_osm_city_name_gpd(
                                self.session, self.data.lat, self.data.lon)
                        if poi_data.get('phone') is not None and poi_data.get(
                                'phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        if poi_data.get('email') is not None and poi_data.get(
                                'email') != '':
                            self.data.phone = poi_data.get('email').strip()
                        self.data.public_holiday_open = False
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
Esempio n. 5
0
 def process(self):
     xml = save_downloaded_xml(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     root = etree.fromstring(xml)
     for e in root.iter('place'):
         name = 'MOL Bubi'
         code = 'hububibir'
         housenumber = None
         conscriptionnumber = None
         street = None
         city = 'Budapest'
         branch = e.attrib['name'].split(
             '-')[1].strip() if e.attrib['name'] is not None else None
         ref = e.attrib['name'].split(
             '-')[0].strip() if e.attrib['name'] is not None else None
         capacity = e.attrib['bike_racks'].strip(
         ) if e.attrib['bike_racks'] is not None else None
         website = None
         nonstop = True
         mo_o = None
         th_o = None
         we_o = None
         tu_o = None
         fr_o = None
         sa_o = None
         su_o = None
         mo_c = None
         th_c = None
         we_c = None
         tu_c = None
         fr_c = None
         sa_c = None
         su_c = None
         lat, lon = check_hu_boundary(e.attrib['lat'].replace(',', '.'),
                                      e.attrib['lng'].replace(',', '.'))
         geom = check_geom(lat, lon)
         postcode = query_postcode_osm_external(self.prefer_osm_postcode,
                                                self.session, lat, lon,
                                                None)
         original = None
         ref = None
         phone = None
         email = None
         insert_data.append([
             code, postcode, city, name, branch, website, original, street,
             housenumber, conscriptionnumber, ref, phone, email, geom,
             nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c,
             we_c, tu_c, fr_c, sa_c, su_c
         ])
     print(insert_data)
     if len(insert_data) < 1:
         logging.warning('Resultset is empty. Skipping ...')
     else:
         df = pd.DataFrame(insert_data)
         df.columns = POI_COLS
         insert_poi_dataframe(self.session, df)
Esempio n. 6
0
 def process(self):
     if self.link:
         with open(self.link, 'r') as f:
             insert_data = []
             text = json.load(f)
             for poi_data in text['results']:
                 first_element = next(iter(poi_data))
                 if self.name == 'CIB bank':
                     name = 'CIB bank'
                     code = 'hucibbank'
                 else:
                     name = 'CIB'
                     code = 'hucibatm'
                 postcode, city, street, housenumber, conscriptionnumber = extract_all_address(
                     poi_data[first_element]['address'])
                 branch = None
                 website = None
                 nonstop = None
                 mo_o = None
                 th_o = None
                 we_o = None
                 tu_o = None
                 fr_o = None
                 sa_o = None
                 su_o = None
                 mo_c = None
                 th_c = None
                 we_c = None
                 tu_c = None
                 fr_c = None
                 sa_c = None
                 su_c = None
                 lat, lon = check_hu_boundary(
                     poi_data[first_element]['latitude'],
                     poi_data[first_element]['longitude'])
                 geom = check_geom(lat, lon)
                 postcode = query_postcode_osm_external(
                     self.prefer_osm_postcode, self.session, lat, lon,
                     postcode)
                 original = poi_data[first_element]['address']
                 ref = None
                 phone = None
                 email = None
                 insert_data.append([
                     code, postcode, city, name, branch, website, original,
                     street, housenumber, conscriptionnumber, ref, phone,
                     email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o,
                     sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                 ])
             if len(insert_data) < 1:
                 logging.warning('Resultset is empty. Skipping ...')
             else:
                 df = pd.DataFrame(insert_data)
                 df.columns = POI_COLS
                 insert_poi_dataframe(self.session, df)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    try:
                        self.data.name = 'OIL!'
                        self.data.code = 'huoilfu'
                        if poi_data.get('zip') is not None and poi_data.get(
                                'zip') != '':
                            self.data.postcode = poi_data.get('zip').strip()
                        if poi_data.get('city') is not None and poi_data.get(
                                'city') != '':
                            self.data.city = clean_city(poi_data.get('city'))
                        self.data.lat, self.data.lon = check_hu_boundary(
                            poi_data.get('lat'), poi_data.get('lng'))
                        if poi_data.get(
                                'address'
                        ) is not None and poi_data.get('address') != '':
                            self.data.original = poi_data.get('address')
                            self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                extract_street_housenumber_better_2(
                                    poi_data.get('address'))
                        if poi_data.get('phone') is not None and poi_data.get(
                                'phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        self.data.fuel_octane_95 = True
                        self.data.fuel_diesel = True
                        if poi_data.get(
                                'id') is not None and poi_data.get('id') != '':
                            self.data.ref = poi_data.get('id').strip()
                        if poi_data.get('url') is not None and poi_data.get(
                                'url') != '':
                            self.data.website = poi_data.get('url').strip()
                        else:
                            self.data.website = 'https://www.oil-benzinkutak.hu'
                        if poi_data.get('store') is not None and poi_data.get(
                                'store') != '':
                            tmp = poi_data.get('store').split(' ', 1)
                            self.data.branch = tmp[1].strip().capitalize()
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(
                    extract_javascript_variable(soup, 'boltok_nyers'))
                for poi_data in text:
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    self.data.city = clean_city(poi_data.get('A_VAROS'))
                    self.data.postcode = poi_data.get('A_IRSZ').strip()
                    self.data.branch = poi_data.get('P_NAME').strip()
                    self.data.name = 'Príma' if 'Príma' in self.data.branch else 'CBA'
                    self.data.code = 'huprimacon' if 'Príma' in self.data.branch else 'hucbacon'
                    for i in range(0, 7):
                        self.data.day_open(
                            i,
                            clean_opening_hours_2(
                                poi_data.get('PS_OPEN_FROM_{}'.format(i + 1)))
                            if poi_data.get('PS_OPEN_FROM_{}'.format(i + 1))
                            is not None else None)
                        self.data.day_close(
                            i,
                            clean_opening_hours_2(
                                poi_data.get('PS_OPEN_TO_{}'.format(i + 1)))
                            if poi_data.get('PS_OPEN_TO_{}'.format(i + 1))
                            is not None else None)
                    self.data.original = poi_data.get('A_CIM')
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data.get('PS_GPS_COORDS_LAT'),
                        poi_data.get('PS_GPS_COORDS_LNG'))
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data.get('A_CIM'))
                    if 'PS_PUBLIC_TEL' in poi_data and poi_data.get(
                            'PS_PUBLIC_TEL') != '':
                        self.data.phone = clean_phone_to_str(
                            poi_data.get('PS_PUBLIC_TEL'))
                    else:
                        self.data.phone = None
                    if 'PS_PUBLIC_EMAIL' in poi_data and poi_data.get(
                            'PS_PUBLIC_EMAIL') != '':
                        self.data.email = poi_data.get('PS_PUBLIC_EMAIL')
                    else:
                        self.data.email = None
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
Esempio n. 9
0
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                text = json.loads(str(soup))
                for poi_data in text.get('list'):
                    try:
                        if poi_data.get('country_id') != 3:
                            continue
                        else:
                            self.data.name = 'Yves Rocher'
                            self.data.code = 'huyvesrcos'
                            self.data.lat, self.data.lon = \
                                check_hu_boundary(poi_data.get(
                                    'latitude'), poi_data.get('longitude'))
                            self.data.website = 'https://www.yves-rocher.hu{}/'.format(
                                poi_data.get('request_path'))
                            opening = poi_data.get('hours')
                            for i in range(0, 7):
                                if i in opening:
                                    self.data.day_open(
                                        i, opening[i]['hour_from'])
                                    self.data.day_close(
                                        i, opening[i]['hour_to'])
                            self.data.postcode = poi_data.get('zip')
                            self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                extract_street_housenumber_better_2(
                                    poi_data.get('address'))
                            self.data.city = clean_city(poi_data.get('city'))
                            self.data.original = poi_data.get('address')
                            if poi_data.get('phone') is not None and poi_data.get('phone') != '':
                                self.data.phone = clean_phone_to_str(
                                    poi_data.get('phone'))
                            if poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \
                                    and self.data.phone is not None:
                                self.data.phone = '{};{}'.format(self.data.phone,
                                                                 clean_phone_to_str(poi_data.get('mobile')))
                            elif poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \
                                    and self.data.phone is None:
                                self.data.phone = clean_phone_to_str(
                                    poi_data.get('mobile'))
                            self.data.public_holiday_open = False
                            self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
Esempio n. 10
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), POST_DATA)
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             name = 'MOL'
             code = 'humolfu'
             postcode = poi_data['postcode'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             city = clean_city(poi_data['city'])
             branch = None
             website = None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data['address']
             ref = None
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Esempio n. 11
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text['data']:
                    '''
                    The Pepco dataset contains all European data. Since the program cannot handle POIs outside Hungary (so far)
                    this will limit only for Hungarian POIs
                    In fact this depends on OSM extract but currently we use only Hungarian OSM extract
                    Select only Hungarian POIs
                    '''
                    if 'city' in poi_data and (
                            poi_data['city'] == '' or query_osm_city_name(
                                self.session, poi_data['city']) is None):
                        continue
                    elif 'city' in poi_data:
                        self.data.city = clean_city(poi_data['city'])
                    else:
                        continue
                    self.data.name = 'Pepco'
                    self.data.code = 'hupepcoclo'
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    self.data.lat, self.data.lon = \
                        check_hu_boundary(
                            poi_data['coordinates']['lat'], poi_data['coordinates']['lng'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                        extract_street_housenumber_better_2(
                            poi_data.get('streetAddress'))
                    self.data.original = poi_data.get('streetAddress')
                    self.data.postcode = poi_data.get('postalCode')
                    # self.data.city = query_osm_city_name_gpd(self.session, self.data.lat, self.data.lon)
                    # Assign opening_hours
                    opening = poi_data['openingHours']
                    for i in range(0, 7):
                        if i in opening:
                            self.data.day_open(i, opening[i]['from'])
                            self.data.day_close(i, opening[i]['to'])
                    # Assign additional informations
                    self.data.phone = clean_phone_to_str(
                        poi_data.get('phoneNumber'))
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(logging.error(e))
Esempio n. 12
0
    def process(self):
        try:
            if self.link:
                # soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache,
                #                            self.filename), self.post, self.verify_link, headers=self.headers)
                with open(os.path.join(self.download_cache, self.filename), 'r') as f:
                    text = json.load(f)
                    if text is not None:
                        text = json.loads(text, strict=False)
                        for poi_data in text:
                            try:
                                if 'Kulcs patika' not in poi_data.get('nev'):
                                    self.data.name = poi_data.get(
                                        'nev').strip()
                                    self.data.branch = None
                                else:
                                    self.data.name = 'Kulcs patika'
                                    self.data.branch = poi_data.get(
                                        'nev').strip()
                                self.data.code = 'hukulcspha'
                                if poi_data.get('link') is not None and poi_data.get('link') != '':
                                    self.data.website = poi_data.get('link').strip() if poi_data.get('link') \
                                        is not None else None
                                if poi_data.get('helyseg') is not None and poi_data.get('helyseg') != '':
                                    self.data.city = clean_city(
                                        poi_data.get('helyseg'))
                                self.data.lat, self.data.lon = \
                                    check_hu_boundary(poi_data.get('marker_position')['latitude'],
                                                      poi_data.get('marker_position')['longitude'])
                                if poi_data.get('cim') is not None and poi_data.get('cim') != '':
                                    self.data.original = poi_data.get('cim')
                                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                        extract_street_housenumber_better_2(
                                            poi_data.get('cim'))
                                if poi_data.get('irsz') is not None and poi_data.get('irsz') != '':
                                    self.data.postcode = poi_data.get(
                                        'irsz').strip()
                                self.data.public_holiday_open = False
                                self.data.add()
                            except Exception as e:
                                logging.error(e)
                                logging.error(poi_data)
                                logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(
                    extract_javascript_variable(soup, 'totem_stations'))
                for poi_data in text.values():
                    self.data.name = 'Mobil Petrol'
                    self.data.code = 'humobpefu'
                    self.data.website = poi_data.get('description')
                    self.data.city = clean_city(poi_data.get('city'))
                    self.data.original = poi_data.get('address')
                    self.data.lat, self.data.lon = check_hu_boundary(poi_data['location']['lat'],
                                                                     poi_data['location']['lng'])
                    self.data.postcode = None
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data.get('address'))
                    self.data.phone = clean_phone_to_str(poi_data.get('phone'))
                    self.data.public_holiday_open = False
                    if '0-24' in poi_data.get('services'):
                        self.data.nonstop = True
                        self.data.public_holiday_open = True
                    else:
                        if '6-22' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '22:00'
                        elif '6-21' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '21:00'
                        elif '5-22' in poi_data.get('services'):
                            open_from = '05:00'
                            open_to = '22:00'
                        elif '6-18' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '18:00'
                        if 'open_from' in locals() and 'open_to' in locals():
                            for i in range(0, 7):
                                self.data.day_open(i, open_from)
                                self.data.day_close(i, open_to)
                        self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            if self.link:
                with open(self.link, 'r') as f:
                    text = json.load(f)
                    data = POIDataset()
                    for poi_data in text['results']:
                        first_element = next(iter(poi_data))
                        if self.name == 'K&H Bank':
                            data.name = 'K&H Bank'
                            data.code = 'hukhbank'
                            data.public_holiday_open = False
                        elif self.name == 'K&H Bank ATM':
                            data.name = 'K&H Bank ATM'
                            data.code = 'hukhatm'
                            data.public_holiday_open = True
                        if data.code == 'hukhatm':
                            data.nonstop = True
                        else:
                            data.nonstop = False
                        data.lat, data.lon = check_hu_boundary(
                            poi_data.get(first_element)['latitude'],
                            poi_data.get(first_element)['longitude'])
                        if poi_data.get(first_element)['address'] is not None and \
                                poi_data.get(first_element)['address'] != '':
                            data.postcode, data.city, data.street, data.housenumber, data.conscriptionnumber = \
                                extract_all_address(
                                    poi_data.get(first_element)['address'])
                            data.original = poi_data.get(
                                first_element)['address']
                        if poi_data.get(
                                'phoneNumber'
                        ) is not None and poi_data.get('phoneNumber') != '':
                            data.phone = clean_phone_to_str(
                                poi_data.get('phoneNumber'))
                        else:
                            data.phone = None
                        data.add()
                    if data is None or data.lenght() < 1:
                        logging.warning('Resultset is empty. Skipping ...')
                    else:
                        insert_poi_dataframe(self.session, data.process())
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
            logging.error(poi_data)
Esempio n. 15
0
 def process(self):
     soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                 self.filetype, POST_DATA)
     if soup is not None:
         text = json.loads(soup)
         for poi_data in text:
             self.data.name = 'Nemzeti Dohánybolt'
             self.data.code = 'hunemdotob'
             self.data.postcode = poi_data.get('postcode').strip()
             self.data.city = clean_city(poi_data['city'])
             self.data.original = poi_data['address']
             self.data.lat, self.data.lon = check_hu_boundary(
                 poi_data['lat'], poi_data['lng'])
             self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             self.data.public_holiday_open = False
             self.data.add()
Esempio n. 16
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    if 'xpres' in poi_data['name']:
                        self.data.name = 'Spar Expressz'
                        self.data.code = 'husparecon'
                    elif 'INTER' in poi_data['name']:
                        self.data.name = 'Interspar'
                        self.data.code = 'husparisup'
                    elif 'market' in poi_data['name']:
                        self.data.name = 'Spar'
                        self.data.code = 'husparsup'
                    elif 'DESPAR' in poi_data['name']:
                        self.data.name = 'DeSpar'
                        self.data.code = 'huspardcon'
                    else:
                        self.data.name = 'Spar'
                        self.data.code = 'husparsup'
                    poi_data['name'] = poi_data['name'].replace(
                        'INTERSPAR', 'Interspar')
                    poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar')
                    ref_match = PATTERN_SPAR_REF.search(poi_data['name'])
                    self.data.ref = ref_match.group(
                        1).strip() if ref_match is not None else None
                    self.data.city = clean_city(poi_data['city'])
                    self.data.postcode = poi_data.get('zipCode').strip()
                    self.data.branch = poi_data['name'].split('(')[0].strip()
                    self.data.website = poi_data['pageUrl'].strip()
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['latitude'], poi_data['longitude'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address'])
                    self.data.original = poi_data['address']
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
Esempio n. 17
0
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype, POST_DATA)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    self.data.name = 'MOL'
                    if " Sziget " in poi_data.get('name'):
                        self.data.code = 'humolwfu'
                    else:
                        self.data.code = 'humolfu'
                    self.data.postcode = poi_data.get('postcode').strip()
                    self.data.city = clean_city(poi_data['city'])
                    self.data.original = poi_data['address']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['lat'], poi_data['lng'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address'])
                    self.data.public_holiday_open = True
                    self.data.truck = True if 'kamion_parkolo' in poi_data.get(
                        'servicesin') else False
                    self.data.food = True if 'fresh_corner' in poi_data.get(
                        'servicesin') else False
                    self.data.rent_lpg_bottles = True if 'pb' in poi_data.get(
                        'servicesin') else False
                    self.data.fuel_adblue = True if 'adblue' in poi_data.get(
                        'servicesin') else False
                    self.data.fuel_lpg = True if 'lpg' in poi_data.get(
                        'servicesin') else False
                    self.data.fuel_octane_95 = True
                    self.data.fuel_diesel = True
                    self.data.fuel_octane_100 = True
                    self.data.fuel_diesel_gtl = True
                    self.data.compressed_air = True
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
Esempio n. 18
0
    def process(self):
        try:
            csv = pd.read_csv(self.link, encoding='UTF-8', sep=';', skiprows=1)
            if csv is not None:
                poi_dict = csv.to_dict('records')
                for poi_data in poi_dict:
                    self.data.name = 'Mobiliti'
                    self.data.code = 'humobilchs'
                    self.data.ref = poi_data.get('Mobiliti azonosító')
                    self.data.branch = poi_data.get('Töltőpont neve')
                    self.data.postcode = poi_data.get('Irányító szám')
                    self.data.city = clean_city(poi_data.get('Település'))
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                        extract_street_housenumber_better_2(
                            poi_data.get('Cím'))
                    self.data.original = poi_data.get('Cím')
                    temp = poi_data.get('GPS koordináták')
                    if temp is None:
                        continue
                    else:
                        self.data.lat, self.data.lon = temp.split(',')
                    self.data.lat, self.data.lon = check_hu_boundary(
                        self.data.lat, self.data.lon)
                    self.data.socket_chademo = poi_data.get('Darab (CHAdeMO)')
                    self.data.socket_chademo_output = poi_data.get(
                        'Teljesítmény (CHAdeMO)')
                    self.data.socket_type2_combo = poi_data.get('Darab (CCS)')
                    self.data.socket_type2_combo_output = poi_data.get(
                        'Teljesítmény (CCS)')
                    self.data.socket_type2_cable = poi_data.get(
                        'Darab (Type 2)')
                    self.data.socket_type2_cable_output = poi_data.get(
                        'Teljesítmény (Type 2)')
                    self.data.manufacturer = poi_data.get('Gyártó')
                    self.data.model = poi_data.get('Típus')
                    self.data.capacity = poi_data.get('Kapacitás')
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
Esempio n. 19
0
    def process(self):
        try:
            if self.link:
                with open(self.link, 'r') as f:
                    text = json.load(f)
                    data = POIDataset()
                    for poi_data in text['availableLocations']:
                        if 'locationStatus' in poi_data and poi_data[
                                'locationStatus'] == 'IN_SERVICE':
                            if self.name == 'CIB Bank':
                                data.name = 'CIB Bank'
                                data.code = 'hucibbank'
                                data.public_holiday_open = False
                            else:
                                data.name = 'CIB Bank ATM'
                                data.code = 'hucibatm'
                                data.public_holiday_open = True
                            data.lat, data.lon = check_hu_boundary(
                                poi_data['location']['lat'],
                                poi_data['location']['lon'])
                            data.city = clean_city(poi_data['city'])
                            data.postcode = poi_data.get('zip').strip()
                            data.housenumber = poi_data['streetNo'].strip()
                            data.street = poi_data['streetName'].strip()
                            data.branch = poi_data['name']
                            if 'phone' in poi_data and poi_data['phone'] != '':
                                data.phone = clean_phone_to_str(
                                    poi_data['phone'])
                            if 'email' in poi_data and poi_data['email'] != '':
                                data.email = poi_data['email'].strip()
                            data.original = poi_data['fullAddress']
                            data.add()
                if data is None or data.lenght() < 1:
                    logging.warning('Resultset is empty. Skipping ...')
                else:
                    insert_poi_dataframe(self.session, data.process())
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
            logging.error(poi_data)
Esempio n. 20
0
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                text = json.loads(str(soup))
                for poi_data in text:
                    try:
                        if 'BENU Gyógyszertár' not in poi_data.get('title'):
                            self.data.name = poi_data.get('title').strip()
                            self.data.branch = None
                        else:
                            self.data.name = 'Benu gyógyszertár'
                            self.data.branch = poi_data.get('title').strip()
                        self.data.code = 'hubenupha'
                        if poi_data.get('description') is not None:
                            pu_match = PATTERN_FULL_URL.match(poi_data.get('description'))
                            self.data.website = pu_match.group(0).strip() if pu_match is not None else None
                        else:
                            self.data.website = None
                        self.data.city = clean_city(poi_data.get('city'))
                        self.data.postcode = poi_data.get('postal_code').strip()
                        self.data.lat, self.data.lon = check_hu_boundary(poi_data.get('lat'), poi_data.get('lng'))
                        self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                            poi_data.get(('street')))
                        self.data.original = poi_data.get('street')
                        if 'phone' in poi_data and poi_data.get('phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        else:
                            self.data.phone = None
                        self.data.public_holiday_open = False
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
    def process(self):
        try:
            csv = pd.read_csv(self.link, encoding='UTF-8', sep=';', skiprows=1)
            if csv is not None:
                poi_dict = csv.to_dict('records')
                for poi_data in poi_dict:
                    self.data.name = 'MOL Plugee'
                    self.data.code = 'humolplchs'
                    self.data.ref = poi_data.get('Azonosító')
                    self.data.postcode = poi_data.get('Irányító szám')
                    self.data.city = clean_city(poi_data.get('Település'))
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                        extract_street_housenumber_better_2(
                            poi_data.get('Cím'))
                    self.data.original = poi_data.get('Cím')
                    lat, lng = poi_data.get('X'), poi_data.get('Y')
                    self.data.lat, self.data.lon = check_hu_boundary(
                        lat.replace(',', '.'), lng.replace(',', '.'))
                    self.data.socket_chademo = poi_data.get('Darab (CHAdeMO)')
                    self.data.socket_chademo_output = poi_data.get(
                        'Teljesítmény (CHAdeMO)')
                    self.data.socket_type2_combo = poi_data.get('Darab (CCS)')
                    self.data.socket_type2_combo_output = poi_data.get(
                        'Teljesítmény (CCS)')
                    self.data.socket_type2_cable = poi_data.get(
                        'Darab (Type 2)')
                    self.data.socket_type2_cable_output = poi_data.get(
                        'Teljesítmény (Type 2)')
                    self.data.socket_type2 = poi_data.get(
                        'Darab (Type 2 – kábel nélkül)')
                    self.data.socket_type2_output = poi_data.get(
                        'Teljesítmény (Type 2 – kábel nélkül)')
                    self.data.manufacturer = poi_data.get('Gyártó')
                    self.data.model = poi_data.get('Típus')
                    self.data.capacity = poi_data.get('Kapacitás')
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype, None, self.verify_link)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(
                    extract_javascript_variable(soup, 'locations'))
                for poi_data in text:
                    poi_data = poi_data['addresses'][0]
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    self.data.name = 'Rossmann'
                    self.data.code = 'hurossmche'
                    self.data.city = clean_city(poi_data['city'])
                    self.data.postcode = poi_data.get('zip').strip()
                    for i in range(0, 7):
                        if poi_data['business_hours'][WeekDaysLong(
                                i).name.lower()] is not None:
                            opening, closing = clean_opening_hours(
                                poi_data['business_hours'][WeekDaysLong(
                                    i).name.lower()])
                            self.data.day_open_close(i, opening, closing)
                        else:
                            self.data.day_open_close(i, None, None)
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['position'][0], poi_data['position'][1])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address'])
                    self.data.original = poi_data['address']
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
Esempio n. 23
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text.get('stores'):
                    self.data.name = 'OBI'
                    self.data.code = 'huobidiy'
                    self.data.postcode = poi_data['address']['zip'].strip()
                    self.data.city = clean_city(poi_data['address']['city'])
                    self.data.original = poi_data['address']['street']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['address']['lat'], poi_data['address']['lon'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address']['street'])
                    if 'phone' in poi_data and poi_data.get('phone') != '':
                        self.data.phone = clean_phone_to_str(
                            poi_data.get('phone'))
                    if 'storeNumber' in poi_data and poi_data.get(
                            'storeNumber') != '':
                        self.data.ref = poi_data.get('storeNumber').strip()
                    if 'email' in poi_data and poi_data.get('email') != '':
                        self.data.email = clean_email(poi_data.get('email'))
                    if 'path' in poi_data and poi_data.get('path') != '':
                        self.data.website = poi_data.get('path')
                    # TODO: opening hour parser for poi_data.get('hours'), format is like:
                    #  Hétfő - Szombat: 8:00 - 20:00\nVasárnap: 08:00 - 18:00
                    # self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
Esempio n. 24
0
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                soup_data = soup.find(
                    'script', {'data-drupal-selector': 'drupal-settings-json'})
                json_data = json.loads(soup_data.text, strict=False)
                for shop in json_data['storesLocator']['BuildCoordinates']:
                    self.data.name = 'Jysk'
                    self.data.code = 'hujyskfur'
                    self.data.lat, self.data.lon = check_hu_boundary(
                        shop.get('lat'), shop.get('lon'))
                    self.data.branch = shop.get('name')
                    internal_id = shop.get('id')
                    shop_soup = save_downloaded_soup('{}?storeId={}'.format(self.link, internal_id),
                                                     os.path.join(self.download_cache,
                                                                  '{}.{}.html'.format(self.filename, internal_id)))
                    self.data.phone = '+36 1 700 8400'
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
Esempio n. 25
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    self.data.name = 'Foxpost'
                    self.data.code = 'hufoxpocso'
                    self.data.postcode = poi_data['zip'].strip()
                    self.data.city = clean_city(poi_data['city'])
                    self.data.branch = poi_data['name']
                    for i in range(0, 7):
                        if poi_data['open'][WeekDaysLongHUUnAccented(
                                i).name.lower()] is not None:
                            opening, closing = clean_opening_hours(
                                poi_data['open'][WeekDaysLongHUUnAccented(
                                    i).name.lower()])
                            self.data.day_open(i, opening)
                            self.data.day_close(i, closing)
                        else:
                            self.data.day_open_close(i, None, None)
                    self.data.original = poi_data['address']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['geolat'], poi_data['geolng'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['street'])
                    self.data.public_holiday_open = False
                    self.data.description = poi_data.get('findme')
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            for pla in soup.findAll('place'):
                try:
                    self.data.name = 'MOL Bubi'
                    self.data.code = 'hububibir'
                    self.data.city = 'Budapest'
                    if pla.get('name') is not None and pla.get('name') != '':
                        self.data.branch = pla.get('name').split('-')[1].strip() \
                            if pla.get('name') is not None else None
                        self.data.ref = pla.get('name').split('-')[0].strip() \
                            if pla.get('name') is not None else None
                    self.data.nonstop = True
                    # self.data.capacity = pla.attrib['bike_racks'].strip() \
                    # if pla.attrib['bike_racks'] is not None else None
                    self.data.lat, self.data.lon = \
                        check_hu_boundary(pla.get('lat').replace(',', '.'),
                                          pla.get('lng').replace(',', '.'))
                    self.data.postcode = query_postcode_osm_external(
                        True, self.session, self.data.lon, self.data.lat, None)
                    self.data.public_holiday_open = True
                    self.data.add()
                except Exception as e:
                    logging.error(e)
                    logging.error(pla)
                    logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')

            logging.error(soup)
Esempio n. 27
0
 def process(self):
     xml = save_downloaded_xml(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     root = etree.fromstring(xml)
     for e in root.findall('post'):
         if e.find('ServicePointType').text == 'PM':
             name = 'Posta'
             code = 'hupostapo'
         elif e.find('ServicePointType').text == 'CS':
             name = 'Posta csomagautomata'
             code = 'hupostacso'
         elif e.find('ServicePointType').text == 'PP':
             name = 'PostaPont'
             code = 'hupostapp'
         else:
             logging.error('Non existing Posta type.')
         postcode = e.get('zipCode')
         street_tmp_1 = e.find('street/name').text.strip() if e.find(
             'street/name').text is not None else None
         street_tmp_2 = e.find('street/type').text.strip() if e.find(
             'street/type').text is not None else None
         if street_tmp_1 is None:
             street = None
         elif street_tmp_2 is None:
             street = street_tmp_1
         elif street_tmp_1 is not None and street_tmp_2 is not None:
             street = '{} {}'.format(street_tmp_1, street_tmp_2)
         else:
             logging.error('Non handled state!')
         housenumber = e.find('street/houseNumber').text.strip().lower(
         ) if e.find('street/houseNumber').text is not None else None
         conscriptionnumber = None
         city = clean_city(e.find('city').text)
         branch = e.find('name').text if e.find(
             'name').text is not None else None
         website = None
         nonstop = None
         mo_o = None
         th_o = None
         we_o = None
         tu_o = None
         fr_o = None
         sa_o = None
         su_o = None
         mo_c = None
         th_c = None
         we_c = None
         tu_c = None
         fr_c = None
         sa_c = None
         su_c = None
         lat, lon = check_hu_boundary(
             e.find('gpsData/WGSLat').text.replace(',', '.'),
             e.find('gpsData/WGSLon').text.replace(',', '.'))
         geom = check_geom(lat, lon)
         postcode = query_postcode_osm_external(self.prefer_osm_postcode,
                                                self.session, lat, lon,
                                                postcode)
         original = None
         ref = None
         phone = None
         email = None
         insert_data.append([
             code, postcode, city, name, branch, website, original, street,
             housenumber, conscriptionnumber, ref, phone, email, geom,
             nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c,
             we_c, tu_c, fr_c, sa_c, su_c
         ])
     if len(insert_data) < 1:
         logging.warning('Resultset is empty. Skipping ...')
     else:
         df = pd.DataFrame(insert_data)
         df.columns = POI_COLS
         insert_poi_dataframe(self.session, df)
Esempio n. 28
0
 def process(self):
     soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), None,
                                 self.verify_link)
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         pattern = re.compile('^\s*var\s*places.*')
         script = soup.find('script', text=pattern)
         m = pattern.match(script.get_text())
         data = m.group(0)
         data = clean_javascript_variable(data, 'places')
         text = json.loads(data)
         for poi_data in text:
             poi_data = poi_data['addresses'][0]
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             name = 'Rossmann'
             code = 'hurossmche'
             city = clean_city(poi_data['city'])
             postcode = poi_data['zip'].strip()
             branch = None
             website = None
             nonstop = False
             if poi_data['business_hours']['monday'] is not None:
                 mo_o, mo_c = clean_opening_hours(poi_data['business_hours']['monday'])
             else:
                 mo_o, mo_c = None, None
             if poi_data['business_hours']['tuesday'] is not None:
                 th_o, th_c = clean_opening_hours(poi_data['business_hours']['tuesday'])
             else:
                 th_o, th_c = None, None
             if poi_data['business_hours']['wednesday'] is not None:
                 we_o, we_c = clean_opening_hours(poi_data['business_hours']['wednesday'])
             else:
                 we_o, we_c = None, None
             if poi_data['business_hours']['thursday'] is not None:
                 tu_o, tu_c = clean_opening_hours(poi_data['business_hours']['thursday'])
             else:
                 tu_o, tu_c = None, None
             if poi_data['business_hours']['friday'] is not None:
                 fr_o, fr_c = clean_opening_hours(poi_data['business_hours']['friday'])
             else:
                 fr_o, fr_c = None, None
             if poi_data['business_hours']['saturday'] is not None:
                 sa_o, sa_c = clean_opening_hours(poi_data['business_hours']['saturday'])
             else:
                 sa_o, sa_c = None, None
             if poi_data['business_hours']['sunday'] is not None:
                 su_o, su_c = clean_opening_hours(poi_data['business_hours']['sunday'])
             else:
                 su_o, su_c = None, None
             lat, lon = check_hu_boundary(poi_data['position'][0], poi_data['position'][1])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['address']
             ref = None
             phone = None
             email = None
             insert_data.append(
                 [code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber,
                  ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c,
                  fr_c, sa_c, su_c])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Esempio n. 29
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), POST_DATA)
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text['results']:
             name = 'OMV'
             code = 'huomvfu'
             postcode = poi_data['postcode'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address_l'])
             city = clean_city(poi_data['town_l'])
             branch = None
             website = None
             nonstop = None
             if poi_data['open_hours'] is not None:
                 oho, ohc = clean_opening_hours(poi_data['open_hours'])
                 if oho == '00:00' and ohc == '24:00':
                     nonstop = True
                     oho, ohc = None, None
             else:
                 oho, ohc = None, None
             mo_o = oho
             th_o = oho
             we_o = oho
             tu_o = oho
             fr_o = oho
             sa_o = oho
             su_o = oho
             mo_c = ohc
             th_c = ohc
             we_c = ohc
             tu_c = ohc
             fr_c = ohc
             sa_c = ohc
             su_c = ohc
             original = poi_data['address_l']
             ref = None
             lat, lon = check_hu_boundary(poi_data['y'], poi_data['x'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             if 'telnr' in poi_data and poi_data['telnr'] != '':
                 phone = clean_phone(poi_data['telnr'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Esempio n. 30
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             if 'xpres' in poi_data['name']:
                 name = 'Spar Expressz'
                 code = 'husparexp'
             elif 'INTER' in poi_data['name']:
                 name = 'Interspar'
                 code = 'husparint'
             elif 'market' in poi_data['name']:
                 name = 'Spar'
                 code = 'husparsup'
             else:
                 name = 'Spar'
                 code = 'husparsup'
             poi_data['name'] = poi_data['name'].replace(
                 'INTERSPAR', 'Interspar')
             poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar')
             ref_match = PATTERN_SPAR_REF.search(poi_data['name'])
             ref = ref_match.group(
                 1).strip() if ref_match is not None else None
             city = clean_city(poi_data['city'])
             postcode = poi_data['zipCode'].strip()
             branch = poi_data['name'].split('(')[0].strip()
             website = poi_data['pageUrl'].strip()
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             lat, lon = check_hu_boundary(poi_data['latitude'],
                                          poi_data['longitude'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['address']
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)