def facade(projectmainfn: Callable[[Configuration], None], **kwargs) -> bool: """Facade that handles ScraperWiki and calls project main function Args: projectmainfn ((configuration) -> None): main function of project **kwargs: configuration parameters to pass to HDX Configuration class Returns: bool: True = success, False = failure """ try: # # Setting up configuration # configuration = Configuration(**kwargs) logger.info('--------------------------------------------------') logger.info('> HDX Site: %s' % configuration.get_hdx_site()) projectmainfn(configuration) except Exception as e: logger.critical(e, exc_info=True) scraperwiki.status('error', 'Run failed: %s' % sys.exc_info()[0]) return False logger.info('Run completed successfully.\n') scraperwiki.status('ok') return True
def facade(projectmainfn: Callable[[Configuration], None], **kwargs) -> bool: """Facade that handles ScraperWiki and calls project main function Args: projectmainfn ((configuration) -> None): main function of project **kwargs: configuration parameters to pass to HDX Configuration class Returns: bool: True = success, False = failure """ try: # # Setting up configuration # configuration = Configuration(**kwargs) logger.info('--------------------------------------------------') logger.info('> HDX Site: %s' % configuration.get_hdx_site_url()) projectmainfn(configuration) except Exception as e: logger.critical(e, exc_info=True) scraperwiki.status('error', 'Run failed: %s' % sys.exc_info()[0]) return False logger.info('Run completed successfully.\n') scraperwiki.status('ok') return True
def _write_to_hdx(self, action, data, id_field_name, file_to_upload=None): # type: (str, dict, str, Optional[str]) -> dict """Creates or updates an HDX object in HDX and return HDX object metadata dict Args: action (str): Action to perform eg. 'create', 'update' data (dict): Data to write to HDX id_field_name (str): Name of field containing HDX object identifier or None file_to_upload (Optional[str]): File to upload to HDX Returns: dict: HDX object metadata """ file = None try: if file_to_upload: file = open(file_to_upload, 'rb') files = [('upload', file)] else: files = None return Configuration.remoteckan().call_action(self.actions()[action], data, files=files, requests_kwargs={ 'auth': Configuration.read()._get_credentials()}) except Exception as e: six.raise_from(HDXError('Failed when trying to %s %s! (POST)' % (action, self.data[id_field_name])), e) finally: if file_to_upload and file: file.close()
def _read_from_hdx(self, object_type, value, fieldname='id', action=None, **kwargs): # type: (str, str, str, Optional[str], Any) -> Tuple[bool, Union[dict, str]] """Makes a read call to HDX passing in given parameter. Args: object_type (str): Description of HDX object type (for messages) value (str): Value of HDX field fieldname (str): HDX field name. Defaults to id. action (Optional[str]): Replacement CKAN action url to use. Defaults to None. **kwargs: Other fields to pass to CKAN. Returns: Tuple[bool, Union[dict, str]]: (True/False, HDX object metadata/Error) """ if not fieldname: raise HDXError("Empty %s field name!" % object_type) if action is None: action = self.actions()['show'] data = {fieldname: value} data.update(kwargs) try: result = Configuration.remoteckan().call_action(action, data, requests_kwargs={ 'auth': Configuration.read()._get_credentials()}) return True, result except NotFound: return False, "%s=%s: not found!" % (fieldname, value) except Exception as e: six.raise_from(HDXError('Failed when trying to read: %s=%s! (POST)' % (fieldname, value)), e)
def test_get_hdx_key_site(self, hdx_key_file, project_config_yaml): actual_configuration = Configuration(hdx_site='prod', hdx_key_file=hdx_key_file, hdx_config_dict={}, project_config_yaml=project_config_yaml) assert actual_configuration.get_api_key() == '12345' assert actual_configuration.get_hdx_site_url() == 'https://data.humdata.org/' assert actual_configuration._get_credentials() == ('', '')
def test_init(self, hdx_key_file, project_config_json, project_config_yaml): with pytest.raises(FileNotFoundError): Configuration() with pytest.raises(FileNotFoundError): Configuration(hdx_key_file='NOT_EXIST', project_config_yaml=project_config_yaml) with pytest.raises(FileNotFoundError): Configuration(hdx_key_file=hdx_key_file, hdx_config_yaml='NOT_EXIST', project_config_yaml=project_config_yaml) with pytest.raises(FileNotFoundError): Configuration(hdx_key_file=hdx_key_file, hdx_config_json='NOT_EXIST', project_config_yaml=project_config_yaml) with pytest.raises(FileNotFoundError): Configuration(hdx_key_file=hdx_key_file, project_config_yaml='NOT_EXIST') with pytest.raises(FileNotFoundError): Configuration(hdx_key_file=hdx_key_file, project_config_json='NOT_EXIST') with pytest.raises(ConfigurationError): Configuration(hdx_site='NOT_EXIST', hdx_key_file=hdx_key_file, project_config_yaml=project_config_yaml) with pytest.raises(ConfigurationError): Configuration(hdx_key_file=hdx_key_file, project_config_json=project_config_json, project_config_yaml=project_config_yaml) with pytest.raises(ConfigurationError): Configuration(hdx_key_file=hdx_key_file, project_config_dict={'la': 'la'}, project_config_yaml=project_config_yaml) with pytest.raises(ConfigurationError): Configuration(hdx_key_file=hdx_key_file, project_config_dict={'la': 'la'}, project_config_json=project_config_json)
def __init__(self, configuration: Configuration, initial_data: dict): super(HDXObject, self).__init__(initial_data) self.configuration = configuration self.old_data = None version_file = open(script_dir_plus_file(join('..', 'version.txt'), HDXObject)) version = version_file.read().strip() self.hdxpostsite = ckanapi.RemoteCKAN(configuration.get_hdx_site_url(), apikey=configuration.get_api_key(), user_agent='HDXPythonLibrary/%s' % version)
def test_project_configuration_dict(self, hdx_key_file): actual_configuration = Configuration.create(hdx_key_file=hdx_key_file) expected_configuration = { 'api_key': '12345', 'hdx_prod_site': { 'url': 'https://data.humdata.org/', 'username': None, 'password': None }, 'hdx_demo_site': { 'url': 'https://demo-data.humdata.org/', 'username': '******', 'password': '******' }, 'hdx_test_site': { 'url': 'https://test-data.humdata.org/', 'username': '******', 'password': '******' }, 'hdx_feature_site': { 'url': 'https://feature-data.humdata.org/', 'username': '******', 'password': '******' }, 'dataset': {'required_fields': [ 'name', 'private', 'title', 'notes', 'dataset_source', 'owner_org', 'dataset_date', 'groups', 'license_id', 'methodology', 'data_update_frequency' ]}, 'resource': {'dataset_id': 'package_id', 'required_fields': ['package_id', 'name', 'format', 'url', 'description', 'url_type', 'resource_type' ]}, 'galleryitem': {'dataset_id': 'dataset_id', 'required_fields': [ 'dataset_id', 'title', 'type', 'description', 'url', 'image_url', ], 'ignore_dataset_id_on_update': True}, } assert actual_configuration == expected_configuration actual_configuration = Configuration.create(hdx_key_file=hdx_key_file, project_config_dict={'abc': '123'}) expected_configuration['abc'] = '123' assert actual_configuration == expected_configuration
def test_hdx_configuration_json(self, hdx_key_file, project_config_yaml): hdx_config_json = join('fixtures', 'config', 'hdx_config.json') actual_configuration = Configuration(hdx_key_file=hdx_key_file, hdx_config_json=hdx_config_json, project_config_yaml=project_config_yaml) expected_configuration = { 'api_key': '12345', 'param_1': 'ABC', 'hdx_prod_site': { 'url': 'https://data.humdata.org/', 'username': None, 'password': None }, 'hdx_test_site': { 'url': 'https://test-data.humdata.org/', 'username': '******', 'password': '******' }, 'dataset': {'required_fields': [ 'name', 'dataset_date', ]}, 'resource': {'dataset_id': 'package_id', 'required_fields': ['name', 'description' ]}, 'galleryitem': {'dataset_id': 'dataset_id', 'required_fields': [ 'dataset_id', ],}, } assert actual_configuration == expected_configuration
def get_HDX_code_from_location(location, configuration=None): # type: (str, Optional[Configuration]) -> Tuple[Optional[str], bool] """Get HDX code for location Args: location (str): Location for which to get HDX code configuration (Optional[Configuration]): HDX configuration. Defaults to global configuration. Returns: Tuple[Optional[str], bool]: HDX code and if the match is strong or (None, False) for no match """ if configuration is None: configuration = Configuration.read() locationlower = location.lower() for locdict in configuration.validlocations(): locationcode = locdict['name'] if locationlower == locationcode.lower(): return locationcode, True for locdict in configuration.validlocations(): if locationlower == locdict['title'].lower(): return locdict['name'], True for locdict in configuration.validlocations(): locationname = locdict['title'].lower() if locationlower in locationname or locationname in locationlower: return locdict['name'], False return None, False
def test_hdx_configuration_yaml(self, hdx_key_file, project_config_yaml): hdx_configuration_yaml = join('tests', 'fixtures', 'config', 'hdx_config.yml') actual_configuration = Configuration.create(hdx_key_file=hdx_key_file, hdx_config_yaml=hdx_configuration_yaml, project_config_yaml=project_config_yaml) expected_configuration = { 'api_key': '12345', 'param_1': 'ABC', 'hdx_prod_site': { 'url': 'https://data.humdata.org/', 'username': None, 'password': None }, 'hdx_test_site': { 'url': 'https://test-data.humdata.org/', 'username': '******', 'password': '******' }, 'dataset': {'required_fields': [ 'name', 'title', 'dataset_date', ]}, 'resource': {'dataset_id': 'package_id', 'required_fields': ['package_id', 'name', 'description' ]}, 'galleryitem': {'dataset_id': 'dataset_id', 'required_fields': [ 'dataset_id', 'title', ], 'ignore_on_update': ['dataset_id']}, } assert actual_configuration == expected_configuration
def __init__(self, initial_data, configuration=None): # type: (dict, Optional[Configuration]) -> None super(HDXObject, self).__init__(initial_data) self.old_data = None if configuration is None: self.configuration = Configuration.read() else: self.configuration = configuration
def main(): conf = Configuration() countries = { 'Benin': "BEN", # 'Burkina Faso': "BFA", # 'Burundi': "BDI", # 'Congo': "COG", # 'Ivory Coast': "CIV", 'Ghana': "GHA" # 'Guinea': "GIN", # 'Guinea-bissau': "GNB", # 'Gambia': "GMB", # 'Liberia': "LBR", # 'Mali': "MLI", # 'Mauritania': "MRT", # 'Malawi':"MWI", # 'Marocco': "MAR", # 'Niger': "NER", # 'Nigeria': "NGA", # 'Senegal':"SEN", # 'Sierra Leone': "SLE", # 'Togo': "TGO", # 'Cameroon': "CMR", # 'Central African Republic':"CAR", # 'Tanzania':"TZA", # 'Rwanda': "RWA", # 'Somalia': "SOM", # 'South Sudan': "SSD", # 'Yemen': "YEM", # 'Democratic Republic of The Congo': "COD", # 'Uganda': "UGA", # 'Zambia': "ZMB", # 'Angola': "AGO", # 'Kenya': "KEN", # 'Ethiopia': "ETH" # 'Algeria': "DZA", # 'Egypt': "EGY", # 'Tunisia':"TUN" # 'Haiti': "HTI", # 'Libya': "LBY", # 'Sudan': "SDN", # 'Bangladesh': "BGD", # 'Djibouti': "DJI", # 'Gabon': "GAB", # 'Madagascar': "MDG", # 'Lesotho': "LSO", # 'Namibia': "NAM", # 'Zimbabwe': "ZWE", # 'Mozambique': "MOZ", # 'Botswana': "BWA" # 'Palestine': "PSE", # 'Mauritius' : "MUS", # 'Zambia' : "ZMB" } dataset = Dataset.read_from_hdx('zimbabwe-healthsites')
def facade(projectmainfn: Callable[[Configuration], None], **kwargs) -> None: """Facade to simplify project setup that calls project main function Args: projectmainfn ((configuration) -> None): main function of project **kwargs: configuration parameters to pass to HDX Configuration class Returns: None """ # # Setting up configuration # configuration = Configuration(**kwargs) logger.info('--------------------------------------------------') logger.info('> HDX Site: %s' % configuration.get_hdx_site()) projectmainfn(configuration)
def facade(projectmainfn: Callable[[Configuration], None], **kwargs) -> None: """Facade to simplify project setup that calls project main function Args: projectmainfn ((configuration) -> None): main function of project **kwargs: configuration parameters to pass to HDX Configuration class Returns: None """ # # Setting up configuration # configuration = Configuration(**kwargs) logger.info('--------------------------------------------------') logger.info('> HDX Site: %s' % configuration.get_hdx_site_url()) projectmainfn(configuration)
def _check_required_fields(self, object_type, ignore_fields): # type: (str, List[str]) -> None """Helper method to check that metadata for HDX object is complete Args: ignore_fields (List[str]): Any fields to ignore in the check Returns: None """ for field in Configuration.read()['%s' % object_type]['required_fields']: if field not in self.data and field not in ignore_fields: raise HDXError("Field %s is missing in %s!" % (field, object_type))
def check_required_fields(self, ignore_dataset_id=False) -> None: """Check that metadata for dataset and its resources and gallery is complete. (ignore_dataset_id is not used.) Returns: None """ for field in Configuration.read()['dataset']['required_fields']: if field not in self.data: raise HDXError("Field %s is missing in dataset!" % field) for resource in self.resources: resource.check_required_fields(ignore_dataset_id=True) for galleryitem in self.gallery: galleryitem.check_required_fields(ignore_dataset_id=True)
def create_in_hdx(self): # type: () -> None """Check if dataset exists in HDX and if so, update it, otherwise create it Returns: None """ self.check_required_fields() loadedid = None if 'id' in self.data: if self._dataset_load_from_hdx(self.data['id']): loadedid = self.data['id'] else: logger.warning('Failed to load dataset with id %s' % self.data['id']) if not loadedid: if self._dataset_load_from_hdx(self.data['name']): loadedid = self.data['name'] if loadedid: logger.warning('Dataset exists. Updating %s' % loadedid) self._dataset_merge_hdx_update(True, True) return filestore_resources = list() if self.resources: for resource in self.resources: resource.check_required_fields(ignore_dataset_id=True) if resource.get_file_to_upload(): filestore_resources.append(resource) self.data['resources'] = self._convert_hdxobjects(self.resources) self._save_to_hdx('create', 'name') for resource in filestore_resources: for created_resource in self.data['resources']: if resource['name'] == created_resource['name']: merge_two_dictionaries(resource.data, created_resource) resource.update_in_hdx() merge_two_dictionaries(created_resource, resource.data) break self.init_resources() self.separate_resources() if self.include_gallery: self.old_data['gallery'] = self._copy_hdxobjects( self.gallery, GalleryItem) galleryitem_dataset_id = Configuration.read( )['galleryitem']['dataset_id'] for i, galleryitem in enumerate(self.gallery): galleryitem[galleryitem_dataset_id] = self.data['id'] galleryitem.check_required_fields() galleryitem.create_in_hdx()
def get_location_from_HDX_code(code, configuration=None): # type: (str, Optional[Configuration]) -> Optional[str] """Get location from HDX location code Args: code (str): code for which to get location name configuration (Optional[Configuration]): HDX configuration. Defaults to global configuration. Returns: Optional[str]: location name """ if configuration is None: configuration = Configuration.read() for locdict in configuration.validlocations(): if code.lower() == locdict['name'].lower(): return locdict['title']
def _merge_hdx_update(self, object_type, id_field_name, file_to_upload=None): # type: (str, str, Optional[str]) -> None """Helper method to check if HDX object exists and update it Args: object_type (str): Description of HDX object type (for messages) id_field_name (str): Name of field containing HDX object identifier file_to_upload (Optional[str]): File to upload to HDX Returns: None """ merge_two_dictionaries(self.data, self.old_data) ignore_dataset_id = Configuration.read()['%s' % object_type].get('ignore_dataset_id_on_update', False) self.check_required_fields(ignore_dataset_id=ignore_dataset_id) self._save_to_hdx('update', id_field_name, file_to_upload)
def check_required_fields(self, ignore_dataset_id=False): # type: (Optional[bool]) -> None """Check that metadata for gallery item is complete. The parameter ignore_dataset_id should be set to True if you intend to add the object to a Dataset object (where it will be created during dataset creation). Args: ignore_dataset_id (bool): Whether to ignore the dataset id. Default is False. Returns: None """ if ignore_dataset_id: ignore_fields = [Configuration.read()['galleryitem']['dataset_id']] else: ignore_fields = list() self._check_required_fields('galleryitem', ignore_fields)
def get_hdx_contents(country, from_datetime, to_datetime): numArticles = 0 config = Configuration.create(hdx_site='prod', hdx_read_only=True) datasets = Dataset.search_in_hdx(country, sort="metadata_modified desc", rows=1) file = open("hdx_contents.txt", "a") for dataset in datasets: file.write("<span style='font-size:16px'>Relevant New Dataset relating to " + country + " on HDX</span><br><br>") if dataset["url"]: file.write("<a href='" + str(dataset["url"]).encode("utf-8") + "''> <span style='font-size:16px'>" + str(dataset["title"]).encode("utf-8") + "</span> </a> <br>") else: file.write("<span style='font-size:16px'>" + str(dataset["title"]).encode("utf-8") + ": No Link Available.</span> <br>") file.write("<span style='font-size:14px'>" + dataset["notes"].encode("utf-8") + "</span>") numArticles += 1 print "writing hdx" file.close() return numArticles
def facade(projectmainfn, **kwargs): # (Callable[[None], None], Any) -> None """Facade to simplify project setup that calls project main function Args: projectmainfn ((None) -> None): main function of project **kwargs: configuration parameters to pass to HDX Configuration class Returns: None """ # # Setting up configuration # site_url = Configuration._create(**kwargs) logger.info('--------------------------------------------------') logger.info('> HDX Site: %s' % site_url) projectmainfn()
def test_hdx_configuration_dict(self, hdx_key_file, project_config_yaml): actual_configuration = Configuration(hdx_site='prod', hdx_key_file=hdx_key_file, hdx_config_dict={ 'hdx_prod_site': { 'url': 'https://data.humdata.org/', 'username': None, 'password': None }, 'XYZ': {'567': 987} }, project_config_yaml=project_config_yaml) expected_configuration = { 'api_key': '12345', 'param_1': 'ABC', 'hdx_prod_site': { 'url': 'https://data.humdata.org/', 'username': None, 'password': None }, 'XYZ': {'567': 987} } assert actual_configuration == expected_configuration
def check_required_fields(self, ignore_dataset_id=False): # type: (Optional[bool]) -> None """Check that metadata for resource is complete and add resource_type and url_type if not supplied. The parameter ignore_dataset_id should be set to True if you intend to add the object to a Dataset object (where it will be created during dataset creation). Args: ignore_dataset_id (bool): Whether to ignore the dataset id. Default is False. Returns: None """ if self.file_to_upload is None: if 'url' in self.data: if 'resource_type' not in self.data: self.data['resource_type'] = 'api' if 'url_type' not in self.data: self.data['url_type'] = 'api' else: raise HDXError( 'Either a url or a file to upload must be supplied!') else: if 'url' not in self.data: self.data[ 'url'] = 'ignore' # must be set even though overwritten if 'resource_type' not in self.data: self.data['resource_type'] = 'file.upload' if 'url_type' not in self.data: self.data['url_type'] = 'upload' if 'tracking_summary' in self.data: del self.data['tracking_summary'] if ignore_dataset_id: ignore_fields = [Configuration.read()['resource']['dataset_id']] else: ignore_fields = list() self._check_required_fields('resource', ignore_fields)
def test_get_hdx_key_site(self, hdx_key_file, project_config_yaml): actual_configuration = Configuration(hdx_site='uat', hdx_key_file=hdx_key_file, hdx_config_dict={}, project_config_yaml=project_config_yaml) assert actual_configuration.get_api_key() == '12345' assert actual_configuration.get_hdx_site() == 'https://uat-data.humdata.org/'
def my_excfn(): testresult.actual_result = Configuration.read().get_hdx_site_url() raise ValueError('Some failure!')
def my_testfn(): testresult.actual_result = Configuration.read().get_hdx_site_url()
def configuration(self): hdx_key_file = join('fixtures', '.hdxkey') project_config_yaml = join('fixtures', 'config', 'project_configuration.yml') return Configuration(hdx_key_file=hdx_key_file, project_config_yaml=project_config_yaml)
def my_excfn(configuration: Configuration): testresult.actual_result = configuration.get_hdx_site() raise ValueError('Some failure!')
def my_testfn(configuration: Configuration): testresult.actual_result = configuration.get_hdx_site()
def __init__(self, configuration: Configuration, initial_data: dict): super(HDXObject, self).__init__(initial_data) self.configuration = configuration self.old_data = None self.hdxpostsite = ckanapi.RemoteCKAN(configuration.get_hdx_site(), apikey=configuration.get_api_key())
def main(): '''Generate dataset and create it in HDX''' conf = Configuration() dataset = generate_dataset(conf) dataset.update_from_yaml() dataset.create_in_hdx()
import os import sys #import pandas as pd from hdx.configuration import Configuration from hdx.data.dataset import Dataset old_stdout = sys.stdout log_file = open("C:/Users/JannisV/Rode Kruis/CP data/FME/Admin_data/message_dataset.log","w") sys.stdout = log_file Configuration.create(hdx_site='prod', hdx_read_only=True) tag='administrative boundaries' datatype='zipped shapefile' downloadpath='C:/Users/JannisV/Rode Kruis/CP data/FME/Admin_data/zipfiles/api/' datasets = Dataset.search_in_hdx('',fq='tags:admin*',rows=10) #print(datasets) for dataset in datasets: if tag in dataset.get_tags(): resources = dataset.get_resources() for resource in resources: if resource['format'] == datatype: print(resources) #folder = downloadpath+dataset['name'] #if not os.path.exists(folder): # os.makedirs(folder) #url, path = resource.download(folder) sys.stdout = old_stdout log_file.close()
types: - lines select: - name - highway where: highway IS NOT NULL """ if __name__ == '__main__': import json import pprint from hdx.configuration import Configuration import requests Configuration.create( hdx_site=os.getenv('HDX_SITE', 'demo'), hdx_key=os.getenv('HDX_API_KEY'), ) logging.basicConfig() f_s = FeatureSelection(F_S) extent = open('hdx_exports/adm0/GIN_adm0.geojson').read() h = HDXExportSet(dataset_prefix='demodata_test', name='Geopreview Test', extent=extent, feature_selection=f_s, locations=['GIN']) h.sync_resources([ Artifact(['hotosm_roads_gpkg.zip'], 'geopackage', theme='roads'), Artifact(['hotosm_roads_lines_shp.zip'], 'shp', theme='roads') ], 'http://exports-staging.hotosm.org/downloads/4fa2e396-a6bf-4476-829b-c88b953af42c' )
def configuration(self): hdx_key_file = join('tests', 'fixtures', '.hdxkey') project_config_yaml = join('tests', 'fixtures', 'config', 'project_configuration.yml') Configuration.create(hdx_key_file=hdx_key_file, project_config_yaml=project_config_yaml)