def test_exception(self, hdx_config_yaml, project_config_yaml): UserAgent.clear_global() testresult.actual_result = None with pytest.raises(ValueError): facade(my_excfn, user_agent='test', hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) UserAgent.clear_global() with pytest.raises(UserAgentError): facade(my_testuafn, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) UserAgent.clear_global()
def test_exception(self, hdx_config_yaml, project_config_yaml): UserAgent.clear_global() testresult.actual_result = None with pytest.raises(ValueError): facade(my_excfn, user_agent='test', hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) UserAgent.clear_global() with pytest.raises(UserAgentError): facade(my_testuafn, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) UserAgent.clear_global()
) parser.add_argument("--old-url-pattern", default="scraperwiki.com", help="String present in old URLs") parser.add_argument("--new-url-pattern", default="scraperwiki-snapshot/datasets", help="String present in new URLs") parser.add_argument("-l", "--log", help="Log file", default=None) parser.add_argument("-d", "--debug", action='store_true', help="Log debug messages.") parser.add_argument("-v", "--verbose", action='store_true', help="Increase verbosity.") config = parser.parse_args() log_level = logging.WARNING if config.verbose: log_level = min(log_level, logging.INFO) if config.debug: log_level = min(log_level, logging.DEBUG) if config.log is not None: logging.basicConfig(filename=config.log, level=log_level) facade(main, hdx_site=config.hdx_site, user_agent_config_yaml=join(expanduser('~'), '.dscheckuseragent.yml'))
# -*- coding: utf-8 -*- ''' Calls a function that generates a dataset and creates it in HDX. ''' import logging from hdx.facades.simple import facade from .my_code import generate_dataset logger = logging.getLogger(__name__) def main(): '''Generate dataset and create it in HDX''' dataset = generate_dataset() dataset.create_in_hdx() if __name__ == '__main__': facade(main, hdx_site='test')
logger.info("Number of countries: %d" % len(countriesdata)) for info, country in progress_storing_tempdir("UNHCR_population", countries, "iso3"): folder = info["folder"] dataset, showcase = generate_dataset_and_showcase( folder, country, countriesdata[country["iso3"]], headers, resources, fields) if dataset: dataset.update_from_yaml() dataset["notes"] = dataset["notes"].replace( "\n", " \n") # ensure markdown has line breaks dataset.generate_resource_view(1) dataset.create_in_hdx( remove_additional_resources=True, hxl_update=False, updated_by_script="HDX Scraper: UNHCR population", batch=info["batch"], ) showcase.create_in_hdx() showcase.add_dataset(dataset) if __name__ == "__main__": facade( main, user_agent_config_yaml=join(expanduser("~"), ".useragents.yml"), user_agent_lookup=lookup, project_config_yaml=join("config", "project_configuration.yml"), )
def test_facade(self, monkeypatch, hdx_config_yaml, project_config_yaml): UserAgent.clear_global() my_user_agent = 'test' testresult.actual_result = None facade(my_testfn, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'https://data.humdata.org/' UserAgent.clear_global() version = get_api_version() testresult.actual_result = None facade(my_testuafn, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % (version, my_user_agent) UserAgent.clear_global() testresult.actual_result = None my_user_agent = 'lala' monkeypatch.setenv('USER_AGENT', my_user_agent) facade(my_testuafn, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % (version, my_user_agent) UserAgent.clear_global() testresult.actual_result = None facade(my_testuafn, user_agent='test', hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % (version, my_user_agent) UserAgent.clear_global() testresult.actual_result = None my_preprefix = 'haha' monkeypatch.setenv('PREPREFIX', my_preprefix) facade(my_testuafn, user_agent='test', hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == '%s:HDXPythonLibrary/%s-%s' % (my_preprefix, version, my_user_agent) UserAgent.clear_global() testresult.actual_result = None my_test_key = '1234' facade(my_testkeyfn, hdx_key=my_test_key, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == my_test_key UserAgent.clear_global() testresult.actual_result = None monkeypatch.setenv('HDX_KEY', my_test_key) facade(my_testkeyfn, hdx_key='aaaa', user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == my_test_key UserAgent.clear_global() testresult.actual_result = None my_test_hdxsite = 'test' facade(my_testfn, hdx_site=my_test_hdxsite, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'https://%s-data.humdata.org/' % my_test_hdxsite UserAgent.clear_global() testresult.actual_result = None monkeypatch.setenv('HDX_SITE', my_test_hdxsite) facade(my_testfn, hdx_site='feature', user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'https://%s-data.humdata.org/' % my_test_hdxsite UserAgent.clear_global() my_test_hdxurl = 'http://other-data.humdata.org' monkeypatch.setenv('HDX_URL', my_test_hdxurl) facade(my_testfn, hdx_site='feature', user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == my_test_hdxurl UserAgent.clear_global()
results[len(results) - 1].extend(rows) dataset, resources, showcase = generate_dataset_resources_and_showcase( pop_types, today) folder = gettempdir() file_to_upload = None for i, _ in enumerate(results): resource = resources[i] file_to_upload = join(folder, resource['name']) write_list_to_csv(results[i], file_to_upload, headers=headers[i]) resource.set_file_to_upload(file_to_upload) dataset.add_update_resources(resources) dataset.update_from_yaml() # dataset.create_in_hdx() for resource in dataset.get_resources(): name = resource['name'].lower() if 'figures' in name and 'disagg' not in name: logger.info('Updating key figures datastore for %s' % name) # resource.update_datastore_for_topline(path=file_to_upload) # showcase.create_in_hdx() # showcase.add_dataset(dataset) if __name__ == '__main__': facade(main, hdx_site='demo', user_agent_config_yaml=join(expanduser('~'), '.useragents.yml'), user_agent_lookup='hdx-scraper-chathamhouse', project_config_yaml=join('config', 'project_configuration.yml'))
configuration = Configuration.read() countries_path = join('config', configuration['countries_filename']) indicators_url = configuration['indicators_url'] mvam_url = configuration['mvam_url'] showcase_url = configuration['showcase_url'] countries = get_countries(countries_path, downloader) variables = get_mvamvariables(indicators_url, downloader) logger.info('Number of datasets to upload: %d' % len(countries)) for info, country in progress_storing_tempdir('WFPFoodSecurity', countries, 'iso3'): dataset, showcase, bites_disabled = \ generate_dataset_and_showcase(mvam_url, showcase_url, downloader, info['folder'], country, variables) if dataset: dataset.update_from_yaml() dataset.generate_resource_view(bites_disabled=bites_disabled) dataset.create_in_hdx( remove_additional_resources=True, hxl_update=False, updated_by_script='HDX Scraper: WFP Food Security', batch=info['batch']) showcase.create_in_hdx() showcase.add_dataset(dataset) if __name__ == '__main__': facade(main, user_agent_config_yaml=join(expanduser('~'), '.useragents.yml'), user_agent_lookup=lookup, project_config_yaml=join('config', 'project_configuration.yml'))
def test_exception(self, hdx_key_file, project_config_yaml): testresult.actual_result = None with pytest.raises(ValueError): facade(my_excfn, hdx_key_file=hdx_key_file, project_config_yaml=project_config_yaml)
sheet.update_values('A1', rows) logger.info('Longest activities: %d' % largest_activities) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Data Freshness Emailer') parser.add_argument('-hk', '--hdx_key', default=None, help='HDX api key') parser.add_argument('-ua', '--user_agent', default=None, help='user agent') parser.add_argument('-pp', '--preprefix', default=None, help='preprefix') parser.add_argument('-hs', '--hdx_site', default=None, help='HDX site to use') parser.add_argument('-gs', '--gsheet_auth', default=None, help='Credentials for accessing Google Sheets') args = parser.parse_args() hdx_key = args.hdx_key if hdx_key is None: hdx_key = getenv('HDX_KEY') user_agent = args.user_agent if user_agent is None: user_agent = getenv('USER_AGENT') if user_agent is None: user_agent = 'crisis-casestudy' preprefix = args.preprefix if preprefix is None: preprefix = getenv('PREPREFIX') hdx_site = args.hdx_site if hdx_site is None: hdx_site = getenv('HDX_SITE', 'prod') gsheet_auth = args.gsheet_auth if gsheet_auth is None: gsheet_auth = getenv('GSHEET_AUTH') facade(main, hdx_key=hdx_key, user_agent=user_agent, preprefix=preprefix, hdx_site=hdx_site, gsheet_auth=gsheet_auth, project_config_yaml=join('config', 'project_configuration.yml'))
def test_facade(self, hdx_key_file, project_config_yaml): testresult.actual_result = None facade(my_testfn, hdx_key_file=hdx_key_file, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'https://test-data.humdata.org/'
# 'United arab emirates': 'AE', # 'United kingdom': 'GB', pas bon # 'United states': 'US', pas bon 'Uruguay': 'UY', 'Uzbekistan': 'UZ', 'Vanuatu': 'VU', 'Venezuela': 'VE', 'Viet nam': 'VN', 'Yemen': 'YE', 'Zambia': 'ZM', 'Zimbabwe': 'ZW' } for pays in countries: # dataset, showcase = generate_dataset_and_showcase(pays, countries[pays]) # dataset.update_from_yaml() # dataset.create_in_hdx(hxl_update=False) # # dataset.create_in_hdx() # dataset.add_tag('INDICATORS') # showcase.create_in_hdx() # showcase.add_dataset(dataset) updateTag(countries[pays]) if __name__ == '__main__': facade(main, hdx_site='prod', user_agent='HDXINTERNAL unhabitat scraper', project_config_yaml=join('config', 'project_configuration.yml'))
return resource_id, dataset_id, url, None, resource_issues['info'], resource_issues['warning'], \ resource_issues['error'], resource_issues['total'], resource_issues['toobig'], \ resource_issues['other'] except Exception as exc: try: code = exc.code except AttributeError: code = '' err = 'Exception during hashing: code=%s message=%s raised=%s.%s url=%s' % ( code, exc, exc.__class__.__module__, exc.__class__.__qualname__, url) raise type(exc)(err).with_traceback(sys.exc_info()[2]) try: return await retry.send_http( session, 'get', url, retries=1, interval=1, backoff=4, http_status_codes_to_retry=[429, 500, 502, 503, 504], fn=fn) except Exception as e: return resource_id, dataset_id, url, str( e), False, False, False, False, False, True if __name__ == '__main__': facade(main, user_agent='test', preprefix='HDXINTERNAL', hdx_site='prod')
def test_facade(self, hdx_key_file, project_config_yaml): testresult.actual_result = None facade(my_testfn, hdx_key_file=hdx_key_file, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'https://test-data.humdata.org/'
results = loop.run_until_complete(future) logger.info('Execution time: %s seconds' % (time.time() - start_time)) print_results(results) def run_grequests(last_modified_check): start_time = time.time() results = grequests_check_resources_for_last_modified(last_modified_check) logger.info('Execution time: %s seconds' % (time.time() - start_time)) print_results(results) def main(configuration): resources = Resource.search_in_hdx(configuration, 'name:') last_modified_check = list() for resource in resources: resource_id = resource['id'] url = resource['url'] if 'data.humdata.org' in url or 'manage.hdx.rwlabs.org' in url or 'proxy.hxlstandard.org' in url or \ 'scraperwiki.com' in url or 'ourairports.com' in url: continue last_modified_check.append((url, resource_id)) last_modified_check = sorted( last_modified_check)[:NUMBER_OF_URLS_TO_PROCESS] # run_grequests(last_modified_check) run_aiohttp(last_modified_check) if __name__ == '__main__': facade(main, hdx_site='prod', hdx_read_only=True)
def test_exception(self, hdx_key_file, project_config_yaml): testresult.actual_result = None with pytest.raises(ValueError): facade(my_excfn, hdx_key_file=hdx_key_file, project_config_yaml=project_config_yaml)
import logging from hdx.hdx_configuration import Configuration from os.path import join from hdx.facades.simple import facade logger = logging.getLogger(__name__) def main(): print('nothing do to yet') if __name__ == '__main__': facade(main, hdx_site='demo', user_agent='HDXINTERNAL OCHA-Philippines scraper')
def test_facade(self, monkeypatch, hdx_config_yaml, project_config_yaml): UserAgent.clear_global() my_user_agent = 'test' testresult.actual_result = None facade(my_testfn, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'https://data.humdata.org' UserAgent.clear_global() version = get_api_version() testresult.actual_result = None facade(my_testuafn, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % ( version, my_user_agent) UserAgent.clear_global() testresult.actual_result = None my_user_agent = 'lala' monkeypatch.setenv('USER_AGENT', my_user_agent) facade(my_testuafn, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % ( version, my_user_agent) UserAgent.clear_global() testresult.actual_result = None facade(my_testuafn, user_agent='test', hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'HDXPythonLibrary/%s-%s' % ( version, my_user_agent) UserAgent.clear_global() testresult.actual_result = None my_preprefix = 'haha' monkeypatch.setenv('PREPREFIX', my_preprefix) facade(my_testuafn, user_agent='test', hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == '%s:HDXPythonLibrary/%s-%s' % ( my_preprefix, version, my_user_agent) UserAgent.clear_global() testresult.actual_result = None my_test_key = '1234' facade(my_testkeyfn, hdx_key=my_test_key, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == my_test_key UserAgent.clear_global() testresult.actual_result = None monkeypatch.setenv('HDX_KEY', my_test_key) facade(my_testkeyfn, hdx_key='aaaa', user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == my_test_key UserAgent.clear_global() testresult.actual_result = None my_test_hdxsite = 'test' facade(my_testfn, hdx_site=my_test_hdxsite, user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'https://%s-data.humdata.org' % my_test_hdxsite UserAgent.clear_global() testresult.actual_result = None monkeypatch.setenv('HDX_SITE', my_test_hdxsite) facade(my_testfn, hdx_site='feature', user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == 'https://%s-data.humdata.org' % my_test_hdxsite UserAgent.clear_global() my_test_hdxurl = 'http://other-data.humdata.org' monkeypatch.setenv('HDX_URL', my_test_hdxurl) facade(my_testfn, hdx_site='feature', user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == my_test_hdxurl UserAgent.clear_global() my_test_hdxurl2 = 'http://other-data.humdata.org/' monkeypatch.setenv('HDX_URL', my_test_hdxurl2) facade(my_testfn, hdx_site='feature', user_agent=my_user_agent, hdx_config_yaml=hdx_config_yaml, project_config_yaml=project_config_yaml) assert testresult.actual_result == my_test_hdxurl UserAgent.clear_global()
from eca import generateDatasets import logging from hdx.hdx_configuration import Configuration from os.path import join from hdx.facades.simple import facade logger = logging.getLogger(__name__) def main(): datasets = generateDatasets(32) for dataset in datasets: dataset.update_from_yaml() # dataset.check_required_fields(ignore_fields=['notes']) dataset.create_in_hdx() if __name__ == '__main__': facade(main, hdx_site='test', user_agent='HDXINTERNAL UNECA scraper')