Beispiel #1
0
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                text = json.loads(str(soup))
                for poi_data in text.get('list'):
                    try:
                        if poi_data.get('country_id') != 3:
                            continue
                        else:
                            self.data.name = 'Yves Rocher'
                            self.data.code = 'huyvesrcos'
                            self.data.lat, self.data.lon = \
                                check_hu_boundary(poi_data.get(
                                    'latitude'), poi_data.get('longitude'))
                            self.data.website = 'https://www.yves-rocher.hu{}/'.format(
                                poi_data.get('request_path'))
                            opening = poi_data.get('hours')
                            for i in range(0, 7):
                                if i in opening:
                                    self.data.day_open(
                                        i, opening[i]['hour_from'])
                                    self.data.day_close(
                                        i, opening[i]['hour_to'])
                            self.data.postcode = poi_data.get('zip')
                            self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                extract_street_housenumber_better_2(
                                    poi_data.get('address'))
                            self.data.city = clean_city(poi_data.get('city'))
                            self.data.original = poi_data.get('address')
                            if poi_data.get('phone') is not None and poi_data.get('phone') != '':
                                self.data.phone = clean_phone_to_str(
                                    poi_data.get('phone'))
                            if poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \
                                    and self.data.phone is not None:
                                self.data.phone = '{};{}'.format(self.data.phone,
                                                                 clean_phone_to_str(poi_data.get('mobile')))
                            elif poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \
                                    and self.data.phone is None:
                                self.data.phone = clean_phone_to_str(
                                    poi_data.get('mobile'))
                            self.data.public_holiday_open = False
                            self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text['markets']:
                    self.data.name = 'Penny'
                    self.data.code = 'hupennysup'
                    self.data.postcode = poi_data['address']['zip'].strip()
                    street_tmp = poi_data['address']['street'].split(',')[0]
                    self.data.city = clean_city(poi_data['address']['city'])
                    self.data.original = poi_data['address']['street']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['address']['latitude'],
                        poi_data['address']['longitude'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        street_tmp.title())
                    if 'phone' in poi_data and poi_data['phone'] != '':
                        self.data.phone = clean_phone_to_str(poi_data['phone'])
                    if 'id' in poi_data and poi_data['id'] != '':
                        self.data.ref = poi_data['id'].strip()
                    self.data.public_holiday_open = False
                    # TODO: Parsing opening_hours from datasource
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                # script = soup.find('div', attrs={'data-stores':True})
                text = json.loads(str(soup))
                for poi_data in text['stores']:
                    try:
                        # Assign: code, postcode, city, name, branch, website, original, street, housenumber,
                        # conscriptionnumber, ref, geom
                        self.data.code = 'hutommacon'
                        if poi_data.get(
                                'name'
                        )[2] is not None and poi_data.get('name')[2] != '':
                            self.data.ref = poi_data.get('name')[2]
                        if poi_data.get(
                                'website'
                        ) is not None and poi_data.get('website') != '':
                            self.data.website = poi_data.get('website')
                        else:
                            self.data.website = 'https://tommarket.hu'
                        self.data.lat, self.data.lon = check_hu_boundary(
                            poi_data.get('lat'), poi_data.get('long'))
                        self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                            extract_street_housenumber_better_2(
                                poi_data.get('address'))
                        if poi_data.get('zip') is not None and poi_data.get(
                                'zip') != '':
                            self.data.postcode = poi_data.get('zip')
                        self.data.original = poi_data.get('address')
                        if poi_data.get(
                                'settlement'
                        ) is not None and poi_data.get('settlement') != '':
                            self.data.city = clean_city(
                                poi_data.get('settlement'))
                        else:
                            self.data.city = query_osm_city_name_gpd(
                                self.session, self.data.lat, self.data.lon)
                        if poi_data.get('phone') is not None and poi_data.get(
                                'phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        if poi_data.get('email') is not None and poi_data.get(
                                'email') != '':
                            self.data.phone = poi_data.get('email').strip()
                        self.data.public_holiday_open = False
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    try:
                        self.data.name = 'OIL!'
                        self.data.code = 'huoilfu'
                        if poi_data.get('zip') is not None and poi_data.get(
                                'zip') != '':
                            self.data.postcode = poi_data.get('zip').strip()
                        if poi_data.get('city') is not None and poi_data.get(
                                'city') != '':
                            self.data.city = clean_city(poi_data.get('city'))
                        self.data.lat, self.data.lon = check_hu_boundary(
                            poi_data.get('lat'), poi_data.get('lng'))
                        if poi_data.get(
                                'address'
                        ) is not None and poi_data.get('address') != '':
                            self.data.original = poi_data.get('address')
                            self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                extract_street_housenumber_better_2(
                                    poi_data.get('address'))
                        if poi_data.get('phone') is not None and poi_data.get(
                                'phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        self.data.fuel_octane_95 = True
                        self.data.fuel_diesel = True
                        if poi_data.get(
                                'id') is not None and poi_data.get('id') != '':
                            self.data.ref = poi_data.get('id').strip()
                        if poi_data.get('url') is not None and poi_data.get(
                                'url') != '':
                            self.data.website = poi_data.get('url').strip()
                        else:
                            self.data.website = 'https://www.oil-benzinkutak.hu'
                        if poi_data.get('store') is not None and poi_data.get(
                                'store') != '':
                            tmp = poi_data.get('store').split(' ', 1)
                            self.data.branch = tmp[1].strip().capitalize()
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(
                    extract_javascript_variable(soup, 'boltok_nyers'))
                for poi_data in text:
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    self.data.city = clean_city(poi_data.get('A_VAROS'))
                    self.data.postcode = poi_data.get('A_IRSZ').strip()
                    self.data.branch = poi_data.get('P_NAME').strip()
                    self.data.name = 'Príma' if 'Príma' in self.data.branch else 'CBA'
                    self.data.code = 'huprimacon' if 'Príma' in self.data.branch else 'hucbacon'
                    for i in range(0, 7):
                        self.data.day_open(
                            i,
                            clean_opening_hours_2(
                                poi_data.get('PS_OPEN_FROM_{}'.format(i + 1)))
                            if poi_data.get('PS_OPEN_FROM_{}'.format(i + 1))
                            is not None else None)
                        self.data.day_close(
                            i,
                            clean_opening_hours_2(
                                poi_data.get('PS_OPEN_TO_{}'.format(i + 1)))
                            if poi_data.get('PS_OPEN_TO_{}'.format(i + 1))
                            is not None else None)
                    self.data.original = poi_data.get('A_CIM')
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data.get('PS_GPS_COORDS_LAT'),
                        poi_data.get('PS_GPS_COORDS_LNG'))
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data.get('A_CIM'))
                    if 'PS_PUBLIC_TEL' in poi_data and poi_data.get(
                            'PS_PUBLIC_TEL') != '':
                        self.data.phone = clean_phone_to_str(
                            poi_data.get('PS_PUBLIC_TEL'))
                    else:
                        self.data.phone = None
                    if 'PS_PUBLIC_EMAIL' in poi_data and poi_data.get(
                            'PS_PUBLIC_EMAIL') != '':
                        self.data.email = poi_data.get('PS_PUBLIC_EMAIL')
                    else:
                        self.data.email = None
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
Beispiel #6
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text['data']:
                    '''
                    The Pepco dataset contains all European data. Since the program cannot handle POIs outside Hungary (so far)
                    this will limit only for Hungarian POIs
                    In fact this depends on OSM extract but currently we use only Hungarian OSM extract
                    Select only Hungarian POIs
                    '''
                    if 'city' in poi_data and (
                            poi_data['city'] == '' or query_osm_city_name(
                                self.session, poi_data['city']) is None):
                        continue
                    elif 'city' in poi_data:
                        self.data.city = clean_city(poi_data['city'])
                    else:
                        continue
                    self.data.name = 'Pepco'
                    self.data.code = 'hupepcoclo'
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    self.data.lat, self.data.lon = \
                        check_hu_boundary(
                            poi_data['coordinates']['lat'], poi_data['coordinates']['lng'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                        extract_street_housenumber_better_2(
                            poi_data.get('streetAddress'))
                    self.data.original = poi_data.get('streetAddress')
                    self.data.postcode = poi_data.get('postalCode')
                    # self.data.city = query_osm_city_name_gpd(self.session, self.data.lat, self.data.lon)
                    # Assign opening_hours
                    opening = poi_data['openingHours']
                    for i in range(0, 7):
                        if i in opening:
                            self.data.day_open(i, opening[i]['from'])
                            self.data.day_close(i, opening[i]['to'])
                    # Assign additional informations
                    self.data.phone = clean_phone_to_str(
                        poi_data.get('phoneNumber'))
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(logging.error(e))
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(
                    extract_javascript_variable(soup, 'totem_stations'))
                for poi_data in text.values():
                    self.data.name = 'Mobil Petrol'
                    self.data.code = 'humobpefu'
                    self.data.website = poi_data.get('description')
                    self.data.city = clean_city(poi_data.get('city'))
                    self.data.original = poi_data.get('address')
                    self.data.lat, self.data.lon = check_hu_boundary(poi_data['location']['lat'],
                                                                     poi_data['location']['lng'])
                    self.data.postcode = None
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data.get('address'))
                    self.data.phone = clean_phone_to_str(poi_data.get('phone'))
                    self.data.public_holiday_open = False
                    if '0-24' in poi_data.get('services'):
                        self.data.nonstop = True
                        self.data.public_holiday_open = True
                    else:
                        if '6-22' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '22:00'
                        elif '6-21' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '21:00'
                        elif '5-22' in poi_data.get('services'):
                            open_from = '05:00'
                            open_to = '22:00'
                        elif '6-18' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '18:00'
                        if 'open_from' in locals() and 'open_to' in locals():
                            for i in range(0, 7):
                                self.data.day_open(i, open_from)
                                self.data.day_close(i, open_to)
                        self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            if self.link:
                with open(self.link, 'r') as f:
                    text = json.load(f)
                    data = POIDataset()
                    for poi_data in text['results']:
                        first_element = next(iter(poi_data))
                        if self.name == 'K&H Bank':
                            data.name = 'K&H Bank'
                            data.code = 'hukhbank'
                            data.public_holiday_open = False
                        elif self.name == 'K&H Bank ATM':
                            data.name = 'K&H Bank ATM'
                            data.code = 'hukhatm'
                            data.public_holiday_open = True
                        if data.code == 'hukhatm':
                            data.nonstop = True
                        else:
                            data.nonstop = False
                        data.lat, data.lon = check_hu_boundary(
                            poi_data.get(first_element)['latitude'],
                            poi_data.get(first_element)['longitude'])
                        if poi_data.get(first_element)['address'] is not None and \
                                poi_data.get(first_element)['address'] != '':
                            data.postcode, data.city, data.street, data.housenumber, data.conscriptionnumber = \
                                extract_all_address(
                                    poi_data.get(first_element)['address'])
                            data.original = poi_data.get(
                                first_element)['address']
                        if poi_data.get(
                                'phoneNumber'
                        ) is not None and poi_data.get('phoneNumber') != '':
                            data.phone = clean_phone_to_str(
                                poi_data.get('phoneNumber'))
                        else:
                            data.phone = None
                        data.add()
                    if data is None or data.lenght() < 1:
                        logging.warning('Resultset is empty. Skipping ...')
                    else:
                        insert_poi_dataframe(self.session, data.process())
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
            logging.error(poi_data)
Beispiel #9
0
    def process(self):
        try:
            if self.link:
                with open(self.link, 'r') as f:
                    text = json.load(f)
                    data = POIDataset()
                    for poi_data in text['availableLocations']:
                        if 'locationStatus' in poi_data and poi_data[
                                'locationStatus'] == 'IN_SERVICE':
                            if self.name == 'CIB Bank':
                                data.name = 'CIB Bank'
                                data.code = 'hucibbank'
                                data.public_holiday_open = False
                            else:
                                data.name = 'CIB Bank ATM'
                                data.code = 'hucibatm'
                                data.public_holiday_open = True
                            data.lat, data.lon = check_hu_boundary(
                                poi_data['location']['lat'],
                                poi_data['location']['lon'])
                            data.city = clean_city(poi_data['city'])
                            data.postcode = poi_data.get('zip').strip()
                            data.housenumber = poi_data['streetNo'].strip()
                            data.street = poi_data['streetName'].strip()
                            data.branch = poi_data['name']
                            if 'phone' in poi_data and poi_data['phone'] != '':
                                data.phone = clean_phone_to_str(
                                    poi_data['phone'])
                            if 'email' in poi_data and poi_data['email'] != '':
                                data.email = poi_data['email'].strip()
                            data.original = poi_data['fullAddress']
                            data.add()
                if data is None or data.lenght() < 1:
                    logging.warning('Resultset is empty. Skipping ...')
                else:
                    insert_poi_dataframe(self.session, data.process())
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
            logging.error(poi_data)
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                text = json.loads(str(soup))
                for poi_data in text:
                    try:
                        if 'BENU Gyógyszertár' not in poi_data.get('title'):
                            self.data.name = poi_data.get('title').strip()
                            self.data.branch = None
                        else:
                            self.data.name = 'Benu gyógyszertár'
                            self.data.branch = poi_data.get('title').strip()
                        self.data.code = 'hubenupha'
                        if poi_data.get('description') is not None:
                            pu_match = PATTERN_FULL_URL.match(poi_data.get('description'))
                            self.data.website = pu_match.group(0).strip() if pu_match is not None else None
                        else:
                            self.data.website = None
                        self.data.city = clean_city(poi_data.get('city'))
                        self.data.postcode = poi_data.get('postal_code').strip()
                        self.data.lat, self.data.lon = check_hu_boundary(poi_data.get('lat'), poi_data.get('lng'))
                        self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                            poi_data.get(('street')))
                        self.data.original = poi_data.get('street')
                        if 'phone' in poi_data and poi_data.get('phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        else:
                            self.data.phone = None
                        self.data.public_holiday_open = False
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text.get('stores'):
                    self.data.name = 'OBI'
                    self.data.code = 'huobidiy'
                    self.data.postcode = poi_data['address']['zip'].strip()
                    self.data.city = clean_city(poi_data['address']['city'])
                    self.data.original = poi_data['address']['street']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['address']['lat'], poi_data['address']['lon'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address']['street'])
                    if 'phone' in poi_data and poi_data.get('phone') != '':
                        self.data.phone = clean_phone_to_str(
                            poi_data.get('phone'))
                    if 'storeNumber' in poi_data and poi_data.get(
                            'storeNumber') != '':
                        self.data.ref = poi_data.get('storeNumber').strip()
                    if 'email' in poi_data and poi_data.get('email') != '':
                        self.data.email = clean_email(poi_data.get('email'))
                    if 'path' in poi_data and poi_data.get('path') != '':
                        self.data.website = poi_data.get('path')
                    # TODO: opening hour parser for poi_data.get('hours'), format is like:
                    #  Hétfő - Szombat: 8:00 - 20:00\nVasárnap: 08:00 - 18:00
                    # self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
 def test_clean_phone(self):
     for i in self.phones:
         original, ph = i['original'], i['converted']
         a = clean_phone_to_str(original)
         with self.subTest():
             self.assertEqual(ph, a)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                # script = soup.find('div', attrs={'data-stores':True})
                text = json.loads(str(soup))
                for poi_data in text.get('stores'):
                    try:
                        # Assign: code, postcode, city, name, branch, website, original, street, housenumber,
                        # conscriptionnumber, ref, geom
                        self.data.branch = poi_data.get('store_name')
                        self.data.ref = poi_data.get('goldid')
                        self.data.website = 'https://tesco.hu/aruhazak/aruhaz/{}/'.format(
                            poi_data.get('urlname'))
                        opening = json.loads(poi_data.get('opening'))
                        for i in range(0, 7):
                            ind = str(i + 1) if i != 6 else '0'
                            if ind in opening:
                                self.data.day_open(i, opening[ind][0])
                                self.data.day_close(i, opening[ind][1])
                        self.data.lat, self.data.lon = check_hu_boundary(
                            poi_data.get('gpslat'), poi_data.get('gpslng'))
                        self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                            extract_street_housenumber_better_2(
                                poi_data.get('address'))
                        self.data.postcode = poi_data.get('zipcode').strip()
                        self.data.city = clean_city(
                            query_osm_city_name_gpd(self.session,
                                                    self.data.lat,
                                                    self.data.lon))
                        if 'xpres' in poi_data.get('name'):
                            if self.data.city not in [
                                    'Győr', 'Sopron', 'Mosonmagyaróvár',
                                    'Levél'
                            ]:
                                self.data.name = 'Tesco Expressz'
                                self.data.code = 'hutescoexp'
                            else:
                                self.data.name = 'S-Market'
                                self.data.code = 'husmrktexp'
                        elif 'xtra' in poi_data.get('name'):
                            self.data.name = 'Tesco Extra'
                            self.data.code = 'hutescoext'
                        else:
                            if self.data.city not in ['Levél']:
                                self.data.name = 'Tesco'
                                self.data.code = 'hutescosup'
                            else:
                                self.data.name = 'S-Market'
                                self.data.code = 'husmrktsup'
                        self.data.original = poi_data.get('address')
                        if poi_data.get('phone') is not None and poi_data.get(
                                'phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        if poi_data.get('goldid') is not None and poi_data.get(
                                'goldid') != '':
                            self.data.ref = poi_data.get('goldid').strip()
                        self.data.public_holiday_open = False
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text['stores']:
                    try:
                        if poi_data.get(
                                'localeCountry').strip().upper() == 'HU':
                            self.data.name = 'dm'
                            self.data.code = 'hudmche'
                            self.data.postcode = poi_data.get(
                                'address')['zip'].strip()
                            street_tmp = poi_data.get(
                                'address')['street'].split(',')[0]
                            self.data.city = clean_city(
                                poi_data.get('address')['city'])
                            self.data.website = 'https://www.dm.hu{}'.format(
                                poi_data.get('storeUrlPath'))
                            self.data.original = poi_data.get(
                                'address')['street']
                            self.data.lat, self.data.lon = \
                                check_hu_boundary(poi_data.get('location')[
                                                  'lat'], poi_data.get('location')['lon'])
                            self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                extract_street_housenumber_better_2(
                                    street_tmp.title())
                            if poi_data.get(
                                    'phone'
                            ) is not None and poi_data.get('phone') != '':
                                self.data.phone = clean_phone_to_str(
                                    poi_data.get('phone'))
                            if poi_data.get('storeNumber'
                                            ) is not None and poi_data.get(
                                                'storeNumber') != '':
                                self.data.ref = poi_data.get(
                                    'storeNumber').strip()
                            opening = poi_data.get('openingDays')
                            try:
                                for i, d in enumerate(opening):
                                    if d.get('weekDay'
                                             ) is not None and 1 <= d.get(
                                                 'weekDay') <= 7:
                                        day = d.get('weekDay')
                                        self.data.day_open(
                                            day - 1,
                                            d.get('timeSlices')[0].get(
                                                'opening'))
                                        self.data.day_close(
                                            day - 1,
                                            d.get('timeSlices')[0].get(
                                                'closing'))
                            except (IndexError, KeyError):
                                logging.warning(
                                    'Exception occurred during opening hours processing'
                                )
                            self.data.public_holiday_open = False
                            self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
Beispiel #15
0
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            for e in soup.findAll('post'):
                try:
                    # If this is a closed post office, skip it
                    # if e.get('ispostpoint') == '0':
                    #    continue
                    #  The 'kirendeltség' post offices are not available to end users, so we remove them
                    if 'okmányiroda' in e.find('name').get_text().lower() or \
                            'mol kirendeltség' in e.find('name').get_text().lower():
                        logging.debug('Skipping non public post office.')
                        continue
                    else:
                        if e.servicepointtype.get_text() == 'PM':
                            self.data.name = 'Posta'
                            self.data.code = 'hupostapo'
                            self.data.public_holiday_open = False
                        elif e.servicepointtype.get_text() == 'CS':
                            self.data.name = 'Posta csomagautomata'
                            self.data.code = 'hupostacso'
                            self.data.public_holiday_open = True
                        elif e.servicepointtype.get_text() == 'PP':
                            self.data.name = 'PostaPont'
                            self.data.code = 'hupostapp'
                            self.data.public_holiday_open = False
                        else:
                            logging.error('Non existing Posta type.')
                        self.data.postcode = e.get('zipcode')
                        self.data.housenumber = e.street.housenumber.get_text().split('(', 1)[0].strip() \
                            if e.street.housenumber is not None else None
                        self.data.conscriptionnumber = None
                        self.data.city = clean_city(e.city.get_text())
                        self.data.branch = e.find('name').get_text(
                        ) if e.find('name') is not None else None
                        if self.data.code == 'hupostapo':
                            self.data.branch = re.sub(
                                r"(\d{1,3})", r"\1. számú", self.data.branch)
                        days = e.findAll('days') if e.findAll(
                            'days') is not None else None
                        nonstop_num = 0
                        for d in days:
                            if len(d) != 0:
                                day_key = None
                                # Try to match day name in data source (day tag) with on of WeekDaysLongHU enum element
                                # Select day based on d.day matching
                                for rd in WeekDaysLongHU:
                                    if rd.name == d.day.get_text():
                                        day_key = rd.value
                                        break
                                    else:
                                        day_key = None
                                    # No day matching skip to next
                                # Skip days that are not exist at data provider's
                                if day_key is None:
                                    logging.warning('Cannot find any opening hours information for day {}.'.
                                                    format(rd.name))
                                    continue
                                else:
                                    # Extract from and to information
                                    from1 = d.from1.get_text() if d.from1 is not None else None
                                    to1 = d.to1.get_text() if d.to1 is not None else None
                                    from2 = d.from2.get_text() if d.from2 is not None else None
                                    to2 = d.to2.get_text() if d.to2 is not None else None
                                    # Avoid duplicated values of opening and close
                                    if from1 != from2 and to1 != to2:
                                        logging.debug('Opening hours in post office: %s: %s-%s; %s-%s.',
                                                      self.data.branch, from1, to1, from2, to2)
                                        self.data.day_open(day_key, from1)
                                        if from2 is None or to2 is None:
                                            self.data.day_close(day_key, from1)
                                            # Count opening hours with nonstop like settings
                                            if from1 in '0:00' and to1 in ['0:00', '23:59', '24:00']:
                                                nonstop_num += 1
                                        else:
                                            # Check on Wednesday if there is a lunch break
                                            # Only same lunch break is supported for every days
                                            if day_key == 3:
                                                self.data.lunch_break_start = to1
                                                self.data.lunch_break_stop = from2
                                            self.data.day_close(day_key, to2)
                                            # Count opening hours with nonstop like settings
                                            if from1 in '0:00' and to2 in ['0:00', '23:59', '24:00']:
                                                nonstop_num += 1
                                    else:
                                        # It seems there are duplications in Posta data source
                                        # Remove duplicates
                                        logging.warning('Dulicated opening hours in post office: %s: %s-%s; %s-%s.',
                                                        self.data.branch, from1, to1, from2, to2)
                                        from2, to2 = None, None
                        # All times are open so it is non stop
                        if nonstop_num >= 7:
                            logging.debug('It is a non stop post office.')
                            self.data.nonstop = True
                        self.data.lat, self.data.lon = \
                            check_hu_boundary(e.gpsdata.wgslat.get_text().replace(',', '.'),
                                              e.gpsdata.wgslon.get_text().replace(',', '.'))
                        # Get street name and type
                        street_tmp_1 = clean_street(e.street.find('name').get_text().strip()) \
                            if e.street.find('name') is not None else None
                        street_tmp_2 = clean_street_type(e.street.type.get_text().strip()) \
                            if e.street.type is not None else None
                        # Streets without types
                        if street_tmp_2 is None:
                            self.data.street = street_tmp_1
                            # Since there is no original address format we create one
                            if self.data.housenumber is not None:
                                self.data.original = '{} {}'.format(
                                    street_tmp_1, self.data.housenumber)
                            else:
                                self.data.original = '{}'.format(street_tmp_1)
                        # Street with types
                        elif street_tmp_1 is not None and street_tmp_2 is not None:
                            self.data.street = '{} {}'.format(
                                street_tmp_1, street_tmp_2)
                            # Since there is no original address format we create one
                            if self.data.housenumber is not None:
                                self.data.original = '{} {} {}'.format(street_tmp_1, street_tmp_2,
                                                                       self.data.housenumber)
                            else:
                                self.data.original = '{} {}'.format(
                                    street_tmp_1, street_tmp_2)
                        else:
                            logging.error(
                                'Non handled state in street data processing!')
                        self.data.phone = clean_phone_to_str(e.phonearea.get_text()) \
                            if e.phonearea is not None else None
                        self.data.email = e.email.get_text().strip() if e.email is not None else None
                        self.data.add()
                except Exception as err:
                    logging.error(err)
                    logging.error(e)
                    logging.exception('Exception occurred')

        except Exception as err:
            logging.exception('Exception occurred')

            logging.error(err)
Beispiel #16
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(extract_javascript_variable(
                    soup, 'markers', True),
                                  strict=False)
                for poi_data in text:
                    self.data.name = 'Avia'
                    self.data.code = 'huaviafu'
                    if self.data.city is None:
                        self.data.city = poi_data['title']
                    self.data.ref = poi_data['kutid'] if poi_data['kutid'] is not None and poi_data['kutid'] != '' \
                        else None
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['lat'], poi_data['lng'])
                    if poi_data['cim'] is not None and poi_data['cim'] != '':
                        self.data.postcode, self.data.city, self.data.street, self.data.housenumber, \
                            self.data.conscriptionnumber = extract_all_address(
                                poi_data['cim'])
                    self.data.website = '/toltoallomas/?id={}'.format(str(poi_data['kutid'])) \
                        if poi_data['kutid'] is not None and poi_data['kutid'] != '' else None
                    self.data.original = poi_data['cim']
                    if 'tel' in poi_data and poi_data['tel'] != '':
                        self.data.phone = clean_phone_to_str(poi_data['tel'])
                    else:
                        self.data.phone = None
                    if 'email' in poi_data and poi_data['email'] != '':
                        self.data.email = clean_email(poi_data['email'])
                    else:
                        self.data.email = None
                    self.data.public_holiday_open = False
                    self.data.fuel_octane_95 = True if poi_data.get('b95') == '1' or poi_data.get('b95g') == '1' \
                        else False
                    self.data.fuel_diesel = True if poi_data.get('dies') == '1' or poi_data.get('gdies') == '1' \
                        else False
                    self.data.fuel_octane_98 = True if poi_data.get(
                        'b98') == '1' else False
                    self.data.fuel_lpg = True if poi_data.get(
                        'lpg') == '1' else False
                    self.data.fuel_e85 = True if poi_data.get(
                        'e85') == '1' else False
                    self.data.rent_lpg_bottles = True if poi_data.get(
                        'pgaz') == '1' else False
                    self.data.compressed_air = True if poi_data.get(
                        'komp') == '1' else False
                    self.data.restaurant = True if poi_data.get(
                        'etterem') == '1' else False
                    self.data.food = True if poi_data.get(
                        'bufe') == '1' else False
                    self.data.truck = True if poi_data.get(
                        'kpark') == '1' else False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)