def test_datastore(self, configuration, post_datastore, topline_yaml, topline_json): resource = Resource.read_from_hdx('TEST1') resource2 = Resource.read_from_hdx('TEST5') TestResource.datastore = None resource.create_datastore(delete_first=0) assert TestResource.datastore == 'create' TestResource.datastore = None resource.create_datastore(delete_first=1) assert TestResource.datastore == 'create' TestResource.datastore = None resource.create_datastore(delete_first=2) assert TestResource.datastore == 'create' TestResource.datastore = None with pytest.raises(HDXError): resource.create_datastore(delete_first=3) resource.update_datastore() assert TestResource.datastore == 'create' TestResource.datastore = None resource.update_datastore_for_topline() assert TestResource.datastore == 'create' TestResource.datastore = None resource.update_datastore_from_dict_schema({ "schema": [ { "id": "code", "type": "text" }, ], "primary_key": "code" }) assert TestResource.datastore == 'create' TestResource.datastore = None resource.update_datastore_from_yaml_schema(topline_yaml) assert TestResource.datastore == 'create' TestResource.datastore = None filefordatastore = join('tests', 'fixtures', 'test_data.csv') resource.update_datastore_from_json_schema(topline_json, path=filefordatastore) assert TestResource.datastore == 'create' TestResource.datastore = None filefordatastore = join('tests', 'fixtures', 'test_data.zip') resource.update_datastore_from_json_schema(topline_json, path=filefordatastore) assert TestResource.datastore == 'create' TestResource.datastore = None filefordatastore = join( 'tests', 'fixtures', 'datastore', 'ACLED-All-Africa-File_20170101-to-20170708.xlsx') resource.update_datastore(path=filefordatastore) assert TestResource.datastore == 'create' with pytest.raises(HDXError): resource2.update_datastore_from_json_schema(topline_json) resource.delete_datastore() assert TestResource.datastore == 'delete' TestResource.datastore = None with pytest.raises(HDXError): del resource['url'] resource.create_datastore()
def test_read_from_hdx(self, configuration, read): resource = Resource.read_from_hdx('TEST1') assert resource['id'] == 'de6549d8-268b-4dfe-adaf-a4ae5c8510d5' assert resource['name'] == 'MyResource1' assert resource['package_id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d' resource = Resource.read_from_hdx('TEST2') assert resource is None resource = Resource.read_from_hdx('TEST3') assert resource is None
def test_read_from_hdx(self, configuration, read): resource = Resource.read_from_hdx(configuration, 'TEST1') assert resource['id'] == 'de6549d8-268b-4dfe-adaf-a4ae5c8510d5' assert resource['name'] == 'MyResource1' assert resource['package_id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d' resource = Resource.read_from_hdx(configuration, 'TEST2') assert resource is None resource = Resource.read_from_hdx(configuration, 'TEST3') assert resource is None
def test_read_from_hdx(self, configuration, read): resource = Resource.read_from_hdx('74b74ae1-df0c-4716-829f-4f939a046811') assert resource['id'] == 'de6549d8-268b-4dfe-adaf-a4ae5c8510d5' assert resource['name'] == 'MyResource1' assert resource['package_id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d' resource = Resource.read_from_hdx('74b74ae1-df0c-4716-829f-4f939a046812') assert resource is None resource = Resource.read_from_hdx('74b74ae1-df0c-4716-829f-4f939a046813') assert resource is None with pytest.raises(HDXError): Resource.read_from_hdx('ABC')
def test_download(self, configuration, read): resource = Resource.read_from_hdx(configuration, 'TEST1') url, path = resource.download() unlink(path) assert url == 'https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/master/tests/fixtures/test_data.csv' resource['url'] = '' with pytest.raises(HDXError): resource.download() resource = Resource.read_from_hdx(configuration, 'TEST4') with pytest.raises(DownloadError): resource.download()
def test_download(self, configuration, read): resource = Resource.read_from_hdx('74b74ae1-df0c-4716-829f-4f939a046811') resource2 = Resource.read_from_hdx('74b74ae1-df0c-4716-829f-4f939a046814') url, path = resource.download() remove(path) assert url == 'https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/master/tests/fixtures/test_data.csv' assert basename(path) == 'MyResource1.csv' resource['url'] = '' with pytest.raises(HDXError): resource.download() with pytest.raises(DownloadError): resource2.download()
def test_datastore(self, configuration, post_datastore, topline_yaml, topline_json, monkeypatch): resource = Resource.read_from_hdx('TEST1') resource2 = Resource.read_from_hdx('TEST5') monkeypatch.undo() resource.create_datastore(delete_first=0) resource.create_datastore(delete_first=1) resource.create_datastore(delete_first=2) with pytest.raises(HDXError): resource.create_datastore(delete_first=3) resource.update_datastore() resource.update_datastore_for_topline() resource.update_datastore_from_yaml_schema(topline_yaml) resource.update_datastore_from_json_schema(topline_json, path='fixtures/downloader/test_data.csv') with pytest.raises(HDXError): resource2.update_datastore_from_json_schema(topline_json)
def test_datastore(self, configuration, post_datastore, topline_yaml, topline_json): resource = Resource.read_from_hdx(configuration, 'TEST1') resource.create_datastore(delete_first=0) resource.create_datastore(delete_first=1) resource.create_datastore(delete_first=2) with pytest.raises(HDXError): resource.create_datastore(delete_first=3) resource.update_datastore() resource.update_datastore_for_topline() resource.update_datastore_from_yaml_schema(topline_yaml) resource.update_datastore_from_json_schema(topline_json) resource = Resource.read_from_hdx(configuration, 'TEST5') with pytest.raises(HDXError): resource.update_datastore_from_json_schema(topline_json)
def test_update_in_hdx(self, configuration, post_update): resource = Resource(configuration) resource['id'] = 'NOTEXIST' with pytest.raises(HDXError): resource.update_in_hdx() resource['name'] = 'LALA' with pytest.raises(HDXError): resource.update_in_hdx() resource = Resource.read_from_hdx(configuration, 'TEST1') assert resource['id'] == 'de6549d8-268b-4dfe-adaf-a4ae5c8510d5' assert resource['format'] == 'XLSX' resource['format'] = 'CSV' resource['id'] = 'TEST1' resource['name'] = 'MyResource1' resource.update_in_hdx() assert resource['id'] == 'TEST1' assert resource['format'] == 'CSV' resource['id'] = 'NOTEXIST' with pytest.raises(HDXError): resource.update_in_hdx() del resource['id'] with pytest.raises(HDXError): resource.update_in_hdx() resource_data = copy.deepcopy(TestResource.resource_data) resource_data['name'] = 'MyResource1' resource_data['id'] = 'TEST1' resource = Resource(configuration, resource_data) resource.create_in_hdx() assert resource['id'] == 'TEST1' assert resource['format'] == 'xlsx'
def add_update_resource(self, resource, ignore_datasetid=False): # type: (Union[Resource,dict,str], Optional[bool]) -> None """Add new or update existing resource in dataset with new metadata Args: resource (Union[Resource,dict,str]): Either resource id or resource metadata from a Resource object or a dictionary ignore_datasetid (Optional[bool]): Whether to ignore dataset id in the resource Returns: None """ if isinstance(resource, str): resource = Resource.read_from_hdx(resource, configuration=self.configuration) elif isinstance(resource, dict): resource = Resource(resource, configuration=self.configuration) if isinstance(resource, Resource): if 'package_id' in resource: if not ignore_datasetid: raise HDXError( 'Resource %s being added already has a dataset id!' % (resource['name'])) resource_updated = self._addupdate_hdxobject( self.resources, 'name', resource) resource_updated.set_file_to_upload(resource.get_file_to_upload()) return raise HDXError('Type %s cannot be added as a resource!' % type(resource).__name__)
def test_update_in_hdx(self, configuration, post_update): resource = Resource() resource['id'] = 'NOTEXIST' with pytest.raises(HDXError): resource.update_in_hdx() resource['name'] = 'LALA' with pytest.raises(HDXError): resource.update_in_hdx() resource = Resource.read_from_hdx('74b74ae1-df0c-4716-829f-4f939a046811') assert resource['id'] == 'de6549d8-268b-4dfe-adaf-a4ae5c8510d5' assert resource.get_file_type() == 'csv' resource.set_file_type('XLSX') resource['id'] = '74b74ae1-df0c-4716-829f-4f939a046811' resource['name'] = 'MyResource1' resource.update_in_hdx() assert resource['id'] == '74b74ae1-df0c-4716-829f-4f939a046811' assert resource['format'] == 'xlsx' assert resource.get_file_type() == 'xlsx' assert resource['url_type'] == 'api' assert resource['resource_type'] == 'api' assert resource[ 'url'] == 'https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/master/tests/fixtures/test_data.csv' assert resource['state'] == 'active' filetoupload = join('tests', 'fixtures', 'test_data.csv') resource.set_file_to_upload(filetoupload) resource.update_in_hdx() assert resource['url_type'] == 'upload' assert resource['resource_type'] == 'file.upload' assert resource[ 'url'] == 'http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/test_data.csv' assert resource['state'] == 'active' resource['id'] = 'NOTEXIST' with pytest.raises(HDXError): resource.update_in_hdx() del resource['id'] with pytest.raises(HDXError): resource.update_in_hdx() resource.data = dict() with pytest.raises(HDXError): resource.update_in_hdx() resource_data = copy.deepcopy(TestResource.resource_data) resource_data['name'] = 'MyResource1' resource_data['id'] = '74b74ae1-df0c-4716-829f-4f939a046811' resource = Resource(resource_data) resource.create_in_hdx() assert resource['id'] == '74b74ae1-df0c-4716-829f-4f939a046811' assert resource.get_file_type() == 'xlsx' assert resource['state'] == 'active'
def test_update_in_hdx(self, configuration, post_update): resource = Resource() resource['id'] = 'NOTEXIST' with pytest.raises(HDXError): resource.update_in_hdx() resource['name'] = 'LALA' with pytest.raises(HDXError): resource.update_in_hdx() resource = Resource.read_from_hdx('74b74ae1-df0c-4716-829f-4f939a046811') assert resource['id'] == 'de6549d8-268b-4dfe-adaf-a4ae5c8510d5' assert resource.get_file_type() == 'csv' resource.set_file_type('XLSX') resource['id'] = '74b74ae1-df0c-4716-829f-4f939a046811' resource['name'] = 'MyResource1' resource.update_in_hdx() assert resource['id'] == '74b74ae1-df0c-4716-829f-4f939a046811' assert resource['format'] == 'xlsx' assert resource.get_file_type() == 'xlsx' assert resource['url_type'] == 'api' assert resource['resource_type'] == 'api' assert resource[ 'url'] == 'https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/master/tests/fixtures/test_data.csv' filetoupload = join('tests', 'fixtures', 'test_data.csv') resource.set_file_to_upload(filetoupload) resource.update_in_hdx() assert resource['url_type'] == 'upload' assert resource['resource_type'] == 'file.upload' assert resource[ 'url'] == 'http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/test_data.csv' resource['id'] = 'NOTEXIST' with pytest.raises(HDXError): resource.update_in_hdx() del resource['id'] with pytest.raises(HDXError): resource.update_in_hdx() resource.data = dict() with pytest.raises(HDXError): resource.update_in_hdx() resource_data = copy.deepcopy(TestResource.resource_data) resource_data['name'] = 'MyResource1' resource_data['id'] = '74b74ae1-df0c-4716-829f-4f939a046811' resource = Resource(resource_data) resource.create_in_hdx() assert resource['id'] == '74b74ae1-df0c-4716-829f-4f939a046811' assert resource.get_file_type() == 'xlsx'
def test_update_in_hdx(self, configuration, post_update): resource = Resource() resource['id'] = 'NOTEXIST' with pytest.raises(HDXError): resource.update_in_hdx() resource['name'] = 'LALA' with pytest.raises(HDXError): resource.update_in_hdx() resource = Resource.read_from_hdx('TEST1') assert resource['id'] == 'de6549d8-268b-4dfe-adaf-a4ae5c8510d5' assert resource['format'] == 'XLSX' resource['format'] = 'CSV' resource['id'] = 'TEST1' resource['name'] = 'MyResource1' resource.update_in_hdx() assert resource['id'] == 'TEST1' assert resource['format'] == 'CSV' assert resource['url_type'] == 'api' assert resource['resource_type'] == 'api' assert resource[ 'url'] == 'https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/master/tests/fixtures/test_data.csv' resource.set_file_to_upload('fixtures/test_data.csv') resource.update_in_hdx() assert resource['url_type'] == 'upload' assert resource['resource_type'] == 'file.upload' assert resource[ 'url'] == 'http://test-data.humdata.org/dataset/6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d/resource/de6549d8-268b-4dfe-adaf-a4ae5c8510d5/download/test_data.csv' resource['id'] = 'NOTEXIST' with pytest.raises(HDXError): resource.update_in_hdx() del resource['id'] with pytest.raises(HDXError): resource.update_in_hdx() resource_data = copy.deepcopy(TestResource.resource_data) resource_data['name'] = 'MyResource1' resource_data['id'] = 'TEST1' resource = Resource(resource_data) resource.create_in_hdx() assert resource['id'] == 'TEST1' assert resource['format'] == 'xlsx'
def hdx_acap_connector(): """Connects to HDX, and fetches acaps covid 19 government measures dataset Arguments: None Returns: pandas.DataFrame """ setup_logging() Configuration.create(hdx_site='prod', user_agent='CoronaWhy', hdx_read_only=True) dataset = Dataset.read_from_hdx( 'acaps-covid19-government-measures-dataset') logger.info("Dataset Fetched from: %s", dataset.get_hdx_url()) logger.info('Expected Update Frequency: %s', dataset.get_expected_update_frequency()) resources = dataset.get_resources() logger.info('Description: %s', resources[0]['description']) logger.info('Last Modified: %s, Revision Last Updated: %s', resources[0]['last_modified'], resources[0]['revision_last_updated']) logger.info('Size: %sMb', resources[0]['size'] / (1024**2)) logger.info('Dataset Url: %s', resources[0]['url']) logger.info('Tags: %s', dataset.get_tags()) resource = Resource.read_from_hdx(resources[0]['id']) url, absolute_path = resource.download('./') logger.info('Downloaded dataset at path: %s', absolute_path) xl = pd.ExcelFile(absolute_path) logger.info(xl.sheet_names) df = xl.parse('Database') return df
def test_delete_from_hdx(self, configuration, post_delete): resource = Resource.read_from_hdx(configuration, 'TEST1') resource.delete_from_hdx() del resource['id'] with pytest.raises(HDXError): resource.delete_from_hdx()
def update_resources(resource_updates): for resource_info in resource_updates.values(): resource = Resource.read_from_hdx(resource_info['id']) resource.set_file_to_upload(resource_info['path']) resource.update_in_hdx()
def test_delete_from_hdx(self, configuration, post_delete): resource = Resource.read_from_hdx('74b74ae1-df0c-4716-829f-4f939a046811') resource.delete_from_hdx() del resource['id'] with pytest.raises(HDXError): resource.delete_from_hdx()
def test_datastore(self, configuration, post_datastore, topline_yaml, topline_json): resource_ids = Resource.get_all_resource_ids_in_datastore() assert resource_ids == ['f9228459-d808-4b51-948f-68a5850abfde', 'af618a0b-09b8-42c8-836f-2be597e1ea34', '748b40dd-7bd3-40a3-941b-e76f0bfbe0eb', '91c78d24-eab3-40b5-ba91-6b29bcda7178', '9320cfce-4620-489a-bcbe-25c73867d4fc', 'b9d2eb36-e65c-417a-bc28-f4dadb149302', 'ca6a0891-8395-4d58-9168-6c44e17e0193'] resource = Resource.read_from_hdx('74b74ae1-df0c-4716-829f-4f939a046811') resource2 = Resource.read_from_hdx('74b74ae1-df0c-4716-829f-4f939a046815') TestResource.datastore = None resource.create_datastore(delete_first=0) assert TestResource.datastore == 'create' TestResource.datastore = None resource.create_datastore(delete_first=1) assert TestResource.datastore == 'create' TestResource.datastore = None resource.create_datastore(delete_first=2) assert TestResource.datastore == 'create' TestResource.datastore = None with pytest.raises(HDXError): resource.create_datastore(delete_first=3) resource.update_datastore() assert TestResource.datastore == 'create' TestResource.datastore = None resource.update_datastore_for_topline() assert TestResource.datastore == 'create' TestResource.datastore = None resource.update_datastore_from_dict_schema({ "schema": [ { "id": "code", "type": "text" }, ], "primary_key": "code" }) assert TestResource.datastore == 'create' TestResource.datastore = None resource.update_datastore_from_yaml_schema(topline_yaml) assert TestResource.datastore == 'create' TestResource.datastore = None filefordatastore = join('tests', 'fixtures', 'test_data.csv') resource.update_datastore_from_json_schema(topline_json, path=filefordatastore) assert TestResource.datastore == 'create' TestResource.datastore = None assert resource.has_datastore() is True assert TestResource.datastore == 'create' TestResource.datastore = None assert resource2.has_datastore() is False TestResource.datastore = None filefordatastore = join('tests', 'fixtures', 'datastore', 'ACLED-All-Africa-File_20170101-to-20170708.xlsx') resource.update_datastore(path=filefordatastore) assert TestResource.datastore == 'create' with pytest.raises(HDXError): resource2.update_datastore_from_json_schema(topline_json) resource.delete_datastore() assert TestResource.datastore == 'delete' TestResource.datastore = None with pytest.raises(HDXError): del resource['url'] resource.create_datastore() if six.PY3: filefordatastore = join('tests', 'fixtures', 'test_data.zip') resource.update_datastore_from_json_schema(topline_json, path=filefordatastore) assert TestResource.datastore == 'create'
def test_delete_from_hdx(self, configuration, post_delete): resource = Resource.read_from_hdx('TEST1') resource.delete_from_hdx() del resource['id'] with pytest.raises(HDXError): resource.delete_from_hdx()