Ejemplo n.º 1
0
def save_downloaded_soup(link,
                         file,
                         filetype,
                         post_data=None,
                         verify=config.get_download_verify_link(),
                         headers=None):
    if config.get_download_use_cached_data() is True and os.path.isfile(file):
        soup = readfile(file, filetype)
    else:
        if link is not None:
            soup = download_content(link, verify, post_data, headers)
            if soup is not None:
                logging.info('We got content, write to file.')
                if not os.path.exists(config.get_directory_cache_url()):
                    os.makedirs(config.get_directory_cache_url())
                with open(file, mode='w', encoding='utf-8') as code:
                    if filetype == FileType.html:
                        soup = BeautifulSoup(soup, 'html.parser')
                        code.write(str(soup.prettify()))
                    elif filetype == FileType.xml:
                        soup = BeautifulSoup(soup,
                                             'lxml',
                                             from_encoding='utf-8')
                        logging.debug('original encoding: %s',
                                      soup.original_encoding)
                        code.write(str(soup.prettify()))
                    elif filetype == FileType.csv or filetype == FileType.json:
                        code.write(str(soup))
                    else:
                        logging.error('Unexpected type to write: %s', filetype)
            else:
                if os.path.exists(file):
                    logging.info(
                        'The %s link returned error code other than 200 but there is an already downloaded file. Try to open it.',
                        link)
                    soup = readfile(file, filetype)
                else:
                    logging.warning(
                        'Skipping dataset: %s. There is not downloadable URL, nor already downbloaded file.',
                        link)
        else:
            if os.path.exists(file):
                soup = readfile(file, filetype)
                if filetype == FileType.html:
                    soup = BeautifulSoup(soup, 'html.parser')
                elif filetype == FileType.xml:
                    soup = BeautifulSoup(soup, 'lxml')
                logging.info(
                    'Using file only: %s. There is not downloadable URL only just the file. Do not forget to update file manually!',
                    file)
            else:
                logging.warning(
                    'Cannot use download and file: %s. There is not downloadable URL, nor already downbloaded file.',
                    file)
    return soup
Ejemplo n.º 2
0
def save_downloaded_pd(link, file, verify=config.get_download_verify_link()):
    if config.get_download_use_cached_data() == True and os.path.isfile(file):
        df = pd.read_csv(file)
    else:
        df = pd.read_csv(link, encoding='UTF-16', sep='\t')
        if df is not None:
            if not os.path.exists(config.get_directory_cache_url()):
                os.makedirs(config.get_directory_cache_url())
            df.to_csv(file)
        else:
            logging.warning('Skipping dataset.')
    return df
Ejemplo n.º 3
0
def save_downloaded_xml(link, file, verify=config.get_download_verify_link()):
    if config.get_download_use_cached_data() == True and os.path.isfile(file):
        with open(file, 'rb') as content_file:
            page = content_file.read()
    else:
        page = download_xml(link, verify)
        if page != None:
            if not os.path.exists(config.get_directory_cache_url()):
                os.makedirs(config.get_directory_cache_url())
            with open(file, mode='wb') as code:
                code.write(page)
        else:
            logging.warning('Skipping dataset.')
    return page
Ejemplo n.º 4
0
def save_downloaded_soup(link,
                         file,
                         post_data=None,
                         verify=config.get_download_verify_link()):
    if config.get_download_use_cached_data() == True and os.path.isfile(file):
        with open(file, 'r') as content_file:
            soup = BeautifulSoup(content_file.read(), 'html.parser')
    else:
        soup = download_soup(link, verify, post_data)
        if soup != None:
            if not os.path.exists(config.get_directory_cache_url()):
                os.makedirs(config.get_directory_cache_url())
            with open(file, mode="w", encoding="utf8") as code:
                code.write(str(soup))
        else:
            logging.warning('Skipping dataset: {}.'.format(link))
    return soup
Ejemplo n.º 5
0
def save_downloaded_pd(link,
                       file,
                       verify=config.get_download_verify_link(),
                       headers=None):
    if config.get_download_use_cached_data() is True and os.path.isfile(file):
        df = pd.read_csv(file)
    else:
        if link is not None:
            cvs = download_content(link, verify, None, None, 'utf-16')
            if cvs is not None:
                logging.info('We got content, write to file.')
                if not os.path.exists(config.get_directory_cache_url()):
                    os.makedirs(config.get_directory_cache_url())
                with open(file, mode='w', encoding='utf-8') as code:
                    code.write(cvs)
                df = pd.read_csv(file, encoding='UTF-8', sep='\t', skiprows=0)
            else:
                if os.path.exists(file):
                    logging.info(
                        'The %s link returned error code other than 200 but there is an already downloaded file. Try to open it.',
                        link)
                    df = pd.read_csv(file,
                                     encoding='UTF-8',
                                     sep='\t',
                                     skiprows=0)
                else:
                    logging.warning(
                        'Skipping dataset: %s. There is not downloadable URL, nor already downbloaded file.',
                        link)
        else:
            if os.path.exists(file):
                df = pd.read_csv(file, encoding='UTF-8', sep='\t', skiprows=0)
                logging.info(
                    'Using file only: %s. There is not downloadable URL only just the file. Do not forget to update file manually!',
                    file)
            else:
                logging.warning(
                    'Cannot use download and file: %s. There is not downloadable URL, nor already downbloaded file.',
                    file)
    return df
 def constains(self):
     # self.link = 'https://www.magnetbank.hu/kapcsolat/fiokkereso'
     self.link = os.path.join(config.get_directory_cache_url(),
                              'hu_magnet_bank.json')
     self.tags = {
         'brand': 'MagNet Bank',
         'brand:wikidata': 'Q17379757',
         'bic': 'HBWEHUHB',
         'brand:wikipedia': 'hu:MagNet Bank',
         'operator': 'MagNet Magyar Közösségi Bank Zrt.',
         'operator:addr': '1062 Budapest, Andrássy út 98.',
         'contact:fax': '+36 1 428 8889',
         'ref:HU:company': '01 10 046111',
         'ref:vatin': 'HU14413591',
         'ref:vatin:hu': '14413591-4-44',
     }
     self.filetype = FileType.json
     self.filename = '{}.{}'.format(self.__class__.__name__,
                                    self.filetype.name)
Ejemplo n.º 7
0
 def constains(self):
     self.link = os.path.join(config.get_directory_cache_url(),
                              'hu_mobiliti_ev.csv')
     self.tags = {
         'amenity': 'charging_station',
         'authentication:app': 'yes',
         'authentication:none': 'yes',
         'authentication:membership_card': 'yes',
         'operator': 'NKM Mobilitás Kft.',
         'operator:addr': '1081 Budapest, II. János Pál pápa tér 20.',
         'fee': 'yes',
         'parking:fee': 'no',
         'opening_hours': '24/7',
         'ref:vatin': 'HU23443486',
         'ref:vatin:hu': '23443486-2-42',
         'ref:HU:company': '01-09-965868',
         'contact:website': 'https://www.mobiliti.hu/emobilitas',
         'contact:email': '*****@*****.**',
         'contact:phone': '+36 62 565 758',
     }
     self.filetype = FileType.csv
     self.filename = '{}.{}'.format(self.__class__.__name__,
                                    self.filetype.name)
 def constains(self):
     self.link = os.path.join(config.get_directory_cache_url(),
                              'hu_mol_plugee_ev.csv')
     self.tags = {
         'amenity': 'charging_station',
         'authentication:app': 'yes',
         'authentication:none': 'yes',
         'brand': 'MOL',
         'operator': 'MOL Nyrt.',
         'operator:addr': '1117 Budapest, Október huszonharmadika utca 18.',
         'fee': 'yes',
         'parking:fee': 'no',
         'opening_hours': '24/7',
         'ref:vatin': 'HU10625790',
         'ref:vatin:hu': '10625790-4-44',
         'ref:HU:company': '01-10-041683',
         'contact:email': '*****@*****.**',
         'contact:phone': '+36 1 998 9888',
         'contact:website': 'https://molplugee.hu/',
         'motorcar': 'yes'
     }
     self.filetype = FileType.csv
     self.filename = '{}.{}'.format(self.__class__.__name__,
                                    self.filetype.name)
Ejemplo n.º 9
0
def import_poi_data_module(module: str):
    """Process all data provider modules enabled in app.conf and write to the database

    Args:
        module (str): Name of module to run
    """
    try:
        db = POIBase('{}://{}:{}@{}:{}/{}'.format(
            config.get_database_type(), config.get_database_writer_username(),
            config.get_database_writer_password(),
            config.get_database_writer_host(),
            config.get_database_writer_port(),
            config.get_database_poi_database()))
        pgsql_pool = db.pool
        session_factory = sessionmaker(pgsql_pool)
        Session = scoped_session(session_factory)
        session = Session()
        module = module.strip()
        logging.info('Processing %s module ...', module)
        if module == 'hu_kh_bank':
            from osm_poi_matchmaker.dataproviders.hu_kh_bank import hu_kh_bank
            work = hu_kh_bank(
                session, config.get_directory_cache_url(), True,
                os.path.join(config.get_directory_cache_url(),
                             'hu_kh_bank.json'), 'K&H Bank')
            insert_type(session, work.types())
            work.process()
            work = hu_kh_bank(
                session, config.get_directory_cache_url(), True,
                os.path.join(config.get_directory_cache_url(),
                             'hu_kh_atm.json'), 'K&H Bank ATM')
            work.process()
        elif module == 'hu_cib_bank':
            from osm_poi_matchmaker.dataproviders.hu_cib_bank import hu_cib_bank
            work = hu_cib_bank(
                session, config.get_directory_cache_url(), True,
                os.path.join(config.get_directory_cache_url(),
                             'hu_cib_bank.json'), 'CIB Bank')
            insert_type(session, work.types())
            work.process()
            work = hu_cib_bank(
                session, config.get_directory_cache_url(), True,
                os.path.join(config.get_directory_cache_url(),
                             'hu_cib_atm.json'), 'CIB Bank ATM')
            work.process()
        elif module == 'hu_posta_json':
            # Old code that uses JSON files
            from osm_poi_matchmaker.dataproviders.hu_posta_json import hu_posta_json
            # We only using csekkautomata since there is no XML from another data source
            work = hu_posta_json(
                session,
                'https://www.posta.hu/szolgaltatasok/posta-srv-postoffice/rest/postoffice/list?searchField=&searchText=&types=csekkautomata',
                config.get_directory_cache_url(), 'hu_postacsekkautomata.json')
            work.process()
        else:
            mo = dataproviders_loader.import_module(
                'dataproviders.{0}'.format(module), module)
            work = mo(session, config.get_directory_cache_url())
            insert_type(session, work.types())
            work.process()
            work.export_list()
    except Exception as err:
        logging.error(err)
        logging.exception('Exception occurred')