예제 #1
0
 def test_search_in_hdx(self, configuration, search):
     datasets = Dataset.search_in_hdx(configuration, 'ACLED')
     assert len(datasets) == 10
     datasets = Dataset.search_in_hdx(configuration, 'ajyhgr')
     assert len(datasets) == 0
     with pytest.raises(HDXError):
         Dataset.search_in_hdx(configuration, '"')
예제 #2
0
def compare_dates(csvname, urlend, filename, docname, keyword):
    dataset = Dataset.read_from_hdx(urlend)
    datasets = Dataset.search_in_hdx(docname, rows=10)
    resources = Dataset.get_all_resources(datasets)
    if d2 > d1:
        url, path = resources[0].download(
            '/Users/katherinenewcomb/Desktop/TestingRepo')
        print('Resource URL %s downloaded to %s' % (url, path))
        f = open(
            '/Users/katherinenewcomb/Desktop/TestingRepo/{}'.format(csvname),
            "w+")
        f.write(dataset.get_dataset_date())
        shutil.move(
            '/Users/katherinenewcomb/Desktop/TestingRepo/{}'.format(filename),
            '/Users/katherinenewcomb/Desktop/TestingRepo/ArchiveData/{}'.
            format(filename))
        newfile = glob.glob(
            '/Users/katherinenewcomb/Desktop/TestingRepo/*{}*'.format(keyword))
        os.rename(
            glob.glob(
                '/Users/katherinenewcomb/Desktop/TestingRepo/*{}*'.format(
                    'poverty-statistic'))[0],
            '/Users/katherinenewcomb/Desktop/TestingRepo/' + (filename))
        # print(newfile)
    else:
        newfile = "No new file"
        print(newfile)
        print('System Update Complete')
    return newfile
예제 #3
0
def datasets(configuration):
    ds = list()
    dataset = Dataset({
        'title': 'UNHCR Refugee Population Statistics',
        'dataset_date': '12/31/2013'
    })
    ds.append(dataset)
    dataset = Dataset({
        'title': 'UNHCR Global Trends: Forced Displacement in 2016 Data',
        'dataset_date': '06/20/2017'
    })
    dataset.add_update_resource(
        {'url': join('tests', 'fixtures', 'UNHCR-14-wrd-tab-v3-external.xls')})
    ds.append(dataset)
    dataset = Dataset({
        'title': 'Global Forced Displacement Trends in 2014',
        'dataset_date': '06/19/2015'
    })
    ds.append(dataset)
    dataset = Dataset({
        'title': 'UNHCR Population of Concern from Colombia',
        'dataset_date': '01/01/1975-12/01/2012'
    })
    ds.append(dataset)
    return ds
예제 #4
0
 def test_generate_resource_view(self):
     dataset = Dataset(TestAcledAfrica.dataset)
     resource = copy.deepcopy(TestAcledAfrica.resource)
     resource['id'] = '123'
     dataset.add_update_resource(resource)
     result = generate_resource_view(dataset)
     assert result == {'resource_id': '123', 'description': '', 'title': 'Quick Charts', 'view_type': 'hdx_hxl_preview',
                       'hxl_preview_config': '{"configVersion":4,"bites":[{"tempShowSaveCancelButtons":false,"ingredient":{"aggregateColumn":null,"valueColumn":"#affected+killed","aggregateFunction":"sum","dateColumn":null,"comparisonValueColumn":null,"comparisonOperator":null,"filters":{},"description":""},"type":"key figure","errorMsg":null,"computedProperties":{"title":"Sum of fatalities","dataTitle":"fatalities","unit":"k"},"uiProperties":{"internalColorPattern":["#1ebfb3","#0077ce","#f2645a","#9C27B0"],"title":"Total Fatalities","postText":"deaths","numberFormat":"us","unit":"none"},"dataProperties":{},"displayCategory":"Key Figures","hashCode":-1955043658},{"tempShowSaveCancelButtons":false,"ingredient":{"aggregateColumn":"#adm1+name","valueColumn":"#affected+killed","aggregateFunction":"sum","dateColumn":null,"comparisonValueColumn":null,"comparisonOperator":null,"filters":{},"description":""},"type":"chart","errorMsg":null,"computedProperties":{"pieChart":false,"title":"Sum of fatalities by admin1","dataTitle":"fatalities"},"uiProperties":{"swapAxis":true,"showGrid":false,"color":"#1ebfb3","sortingByValue1":"DESC","sortingByCategory1":null,"internalColorPattern":["#1ebfb3","#0077ce","#f2645a","#9C27B0"],"title":"Top 5 Regions for Fatalities","dataTitle":"deaths","limit":5},"dataProperties":{},"displayCategory":"Charts","hashCode":738289179},{"tempShowSaveCancelButtons":false,"ingredient":{"aggregateColumn":null,"valueColumn":"#affected+killed","aggregateFunction":"sum","dateColumn":"#date+occurred","comparisonValueColumn":null,"comparisonOperator":null,"filters":{},"description":""},"type":"timeseries","errorMsg":null,"computedProperties":{"pieChart":false,"filters":{"filterWith":[{"#affected+killed":"is not empty"}],"filterWithout":[]},"title":"Sum of fatalities by event_date","dataTitle":"fatalities"},"uiProperties":{"swapAxis":true,"showGrid":true,"color":"#0077ce","sortingByValue1":"DESC","sortingByCategory1":null,"showPoints":false,"internalColorPattern":["#1ebfb3","#0077ce","#f2645a","#9C27B0"],"title":"Fatalities over Time","dataTitle":"deaths"},"dataProperties":{},"displayCategory":"Timeseries","hashCode":2126517972}],"cookbookName":"generic"}'}
def pop_data_download(region_names, wp_year=2017):
    
    from hdx.utilities.easy_logging import setup_logging
    setup_logging()
    from hdx.hdx_configuration import Configuration
    Configuration.create(hdx_site='prod', user_agent='Read-only user', hdx_read_only=True)
    from hdx.data.dataset import Dataset
    
    import wpgpDownload
    from wpgpDownload.utils.convenience_functions import download_country_covariates as download_worldpop
    from wpgpDownload.utils.convenience_functions import refresh_csv
    refresh_csv()

    hdx_datasets = Dataset.search_in_hdx('hrsl', rows=500)
    hdx_resources = Dataset.get_all_resources(hdx_datasets)
    
    print('')

    country_names = set([region[0:3] for region in region_names])

    for country in country_names:
        print(country)

        for res in hdx_resources:
            if 'population_'+country.lower() in res['name'] and '.zip' in res['name'] and 'csv' not in res['name']:
                print('Downloading HRSL',res['name'], end='\r')
                url, path = res.download()
                print('HRSL',res['name'],'download completed       ')
                shutil.move(Path(path),Path('./'+country+'/misc_data/population_'+country.lower()+'.zip'))
                zipfile.ZipFile(Path('./'+country+'/misc_data/population_'+country.lower()+'.zip'), 'r').extractall(Path('./'+country+'/misc_data'))
                for file in Path('./'+country+'/misc_data').iterdir():
                    if 'population_'+country.lower() in file.name and file.suffix != '.tif':
                        os.remove(file)
        
        if type(wp_year) == list:
            years = wp_year
        elif type(wp_year) == int: 
            years = [wp_year]

        #NTL_files = [file for file in Path("./"+country+"/NTL").iterdir() if "NTL" in file.name]
        #
        #years = []
        #for NTL_file in NTL_files:
        #    years.append(NTL_file.name[4:8])
        #years = [year for year in set(years)]
        #years.sort()

        for year in years:
            print('Downloading WorldPop '+country+' '+str(year)+'\t\t',end='\r')
            download_worldpop(ISO=country,out_folder='.\\'+country+'\\worldpop',prod_name='ppp_'+str(year))
            print('WorldPop '+country+' '+str(year)+' download completed\t\t')
        
        print("")
        
    print('Done')
예제 #6
0
 def test_read_from_hdx(self, configuration, read):
     dataset = Dataset.read_from_hdx(configuration, 'TEST1')
     assert dataset['id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d'
     assert dataset['name'] == 'MyDataset1'
     assert dataset['dataset_date'] == '06/04/2016'
     assert len(dataset.resources) == 2
     assert len(dataset.gallery) == 1
     dataset = Dataset.read_from_hdx(configuration, 'TEST2')
     assert dataset is None
     dataset = Dataset.read_from_hdx(configuration, 'TEST3')
     assert dataset is None
예제 #7
0
 def test_generate_resource_view(self):
     dataset = Dataset(TestDHS.dataset)
     resource = copy.deepcopy(TestDHS.resources[0])
     resource['id'] = '123'
     resource['url'] = 'https://test-data.humdata.org/dataset/495bf9ef-afab-41ac-a804-ca5978aa4213/resource/703d04ef-1787-44b1-92d5-c4ddd283d33f/download/dhs-quickstats_national_afg.csv'
     dataset.add_update_resource(resource)
     resource_view = generate_resource_view(dataset, bites_disabled=[True, True, True])
     assert resource_view is None
     resource_view = generate_resource_view(dataset, bites_disabled=[False, True, False])
     assert resource_view == {'resource_id': '123', 'description': '', 'title': 'Quick Charts', 'view_type': 'hdx_hxl_preview',
                              'hxl_preview_config': '{"configVersion": 5, "bites": [{"tempShowSaveCancelButtons": false, "ingredient": {"valueColumn": "#indicator+value+num", "aggregateFunction": "sum", "dateColumn": null, "comparisonValueColumn": null, "comparisonOperator": null, "filters": {"filterWith": [{"#date+year": "$MAX$"}, {"#indicator+code": "CM_ECMR_C_IMR"}, {"#indicator+label+code": "14003"}]}, "title": "Infant Mortality Rate", "description": "Rate is for the period of 10 years preceding the survey"}, "type": "key figure", "errorMsg": null, "computedProperties": {"explainedFiltersMap": {}, "pieChart": false, "dataTitle": "Value"}, "uiProperties": {"swapAxis": true, "showGrid": true, "color": "#1ebfb3", "sortingByValue1": "DESC", "sortingByCategory1": null, "internalColorPattern": ["#1ebfb3", "#0077ce", "#f2645a", "#9C27B0"], "dataTitle": "Percent", "postText": "percent"}, "dataProperties": {}, "displayCategory": "Charts", "hashCode": -487125335}, {"tempShowSaveCancelButtons": false, "ingredient": {"valueColumn": "#indicator+value+num", "aggregateFunction": "sum", "dateColumn": null, "comparisonValueColumn": null, "comparisonOperator": null, "filters": {"filterWith": [{"#date+year": "$MAX$"}, {"#indicator+code": "ED_LITR_W_LIT"}]}, "title": "Women who are Literate", "description": ""}, "type": "key figure", "errorMsg": null, "computedProperties": {"explainedFiltersMap": {}, "pieChart": false, "dataTitle": "Value"}, "uiProperties": {"swapAxis": true, "showGrid": true, "color": "#1ebfb3", "sortingByValue1": "ASC", "sortingByCategory1": null, "internalColorPattern": ["#1ebfb3", "#0077ce", "#f2645a", "#9C27B0"], "dataTitle": "Percent", "postText": "percent"}, "dataProperties": {}, "displayCategory": "Charts", "hashCode": -539301812}], "recipeUrl": "https://raw.githubusercontent.com/mcarans/hxl-recipes/dev/recipes/dhs/recipe.json"}'}
예제 #8
0
 def test_read_from_hdx(self, configuration, read):
     dataset = Dataset.read_from_hdx(configuration, 'TEST1')
     assert dataset['id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d'
     assert dataset['name'] == 'MyDataset1'
     assert dataset['dataset_date'] == '06/04/2016'
     assert len(dataset.resources) == 2
     assert len(dataset.gallery) == 1
     dataset = Dataset.read_from_hdx(configuration, 'TEST2')
     assert dataset is None
     dataset = Dataset.read_from_hdx(configuration, 'TEST3')
     assert dataset is None
 def construct_dataset(origdata,
                       origresources,
                       maintainer=None,
                       orgid=None,
                       organization=None):
     dataset = Dataset(copy.deepcopy(origdata))
     if maintainer:
         dataset['maintainer'] = maintainer
     if orgid:
         dataset['owner_org'] = orgid
     if organization:
         dataset['organization'] = organization
     dataset.add_update_resources(copy.deepcopy(origresources))
     return dataset
예제 #10
0
    def datasets(self):  # noqa
        if self._datasets:
            return self._datasets

        self._datasets = {}
        for theme in self._feature_selection.themes:
            dataset = Dataset()
            name = '{}_{}'.format(self._dataset_prefix, theme)
            title = '{} {} (OpenStreetMap Export)'.format(self._name, theme)
            tags = []
            caveats = ''
            if 'hdx' in self._feature_selection.doc[theme]:
                hdx = self._feature_selection.doc[theme]['hdx']
                title = hdx.get('name') or title
                caveats = hdx.get('caveats', caveats)

                if 'tags' in hdx:
                    tags = map(lambda tag: tag.strip(), hdx['tags'].split(','))

            dataset['name'] = name
            dataset['title'] = title
            dataset['caveats'] = caveats
            dataset['private'] = self.is_private
            dataset['notes'] = self.hdx_note(theme)
            dataset['dataset_source'] = 'OpenStreetMap contributors'
            dataset.set_dataset_date_from_datetime(self._dataset_date)
            dataset['owner_org'] = '225b9f7d-e7cb-4156-96a6-44c9c58d31e3'
            dataset['license_id'] = self._license
            dataset['methodology'] = 'Other'
            dataset['methodology_other'] = 'Volunteered geographic information'
            dataset['data_update_frequency'] = str(self._data_update_frequency)
            dataset['subnational'] = str(int(self.subnational))
            dataset['groups'] = []

            # warning: this makes a network call
            [dataset.add_other_location(x) for x in self._locations]
            dataset.add_tags(tags)

            ga = GalleryItem({
                'title':
                'OSM Analytics',
                'description':
                'View detailed information about OpenStreetMap edit history in this area.',
                'url':
                self.osm_analytics_url,
                'image_url':
                'http://{}/static/ui/images/osm_analytics.png'.format(
                    self.hostname),
                'type':
                'Visualization',
            })
            dataset.add_update_galleryitem(ga)

            self._datasets[theme] = dataset
        return self._datasets
예제 #11
0
 def test_add_update_delete_resources(self, configuration, post_delete):
     dataset_data = copy.deepcopy(TestDataset.dataset_data)
     resources_data = copy.deepcopy(TestDataset.resources_data)
     dataset = Dataset(dataset_data)
     dataset.add_update_resources(resources_data)
     assert len(dataset.resources) == 2
     dataset.delete_resource('NOTEXIST')
     assert len(dataset.resources) == 2
     dataset.delete_resource('de6549d8-268b-4dfe-adaf-a4ae5c8510d5')
     assert len(dataset.resources) == 1
     resources_data = copy.deepcopy(TestDataset.resources_data)
     resource = Resource(resources_data[0])
     resource.set_file_to_upload('lala')
     dataset.add_update_resource(resource)
     assert dataset.resources[1].get_file_to_upload() == 'lala'
예제 #12
0
def get_dataset_from_hdx(hdx_address: str, dataset_name: str,
                         output_filename: str):
    """
    Use the HDX API to download a daset based on the address and dataset ID
    :param hdx_address: The HDX address of the dataset
    :param dataset_name: The name of the dataset
    :param save_filepath: The desired full filepath of the downloaded file
    :param cache_days: How many days to cache the file (temporary for development)
    """
    HDX_SITE = 'prod'
    USER_AGENT = 'MapAction'

    Configuration.create(hdx_site=HDX_SITE,
                         user_agent=USER_AGENT,
                         hdx_read_only=True)
    logger = logging.getLogger(__name__)

    # TODO: make more generic caching ability
    # file_age_days = utils.get_file_age_days(save_filepath)
    # if 0 < file_age_days < cache_days:
    #     return save_filepath
    logger.info(f'Querying HDX API for dataset {hdx_address}')
    resources = Dataset.read_from_hdx(hdx_address).get_resources()
    for resource in resources:
        if resource['name'] == dataset_name:
            _, download_filepath = resource.download()
            copy_file(source_path=download_filepath,
                      target_path=output_filename)
            save_file(output_filename)
            logger.info(f'Saved to {output_filename}')
            return output_filename
    raise HDXDatasetNotFound(
        f'HDX dataset with address "{hdx_address}" and name "{dataset_name}" not found'
    )
예제 #13
0
 def test_get_add_location(self, configuration, read):
     dataset = Dataset.read_from_hdx(configuration, 'TEST1')
     assert dataset['groups'] == resultgroups
     assert dataset.get_location() == ['Algeria', 'Zimbabwe']
     dataset.add_country_location('sdn')
     expected = copy.deepcopy(resultgroups)
     expected.append({'name': 'sdn'})
     assert dataset['groups'] == expected
     assert dataset.get_location() == ['Algeria', 'Zimbabwe', 'Sudan']
     dataset.add_country_location('dza')
     assert dataset['groups'] == expected
     assert dataset.get_location() == ['Algeria', 'Zimbabwe', 'Sudan']
     dataset.add_country_locations(['KEN', 'moz', 'ken'])
     expected.extend([{'name': 'ken'}, {'name': 'moz'}])
     assert dataset['groups'] == expected
     assert dataset.get_location() == [
         'Algeria', 'Zimbabwe', 'Sudan', 'Kenya', 'Mozambique'
     ]
     dataset.add_continent_location('af')
     assert len(dataset['groups']) == 58
     assert len(dataset.get_location()) == 58
     del dataset['groups']
     assert dataset.get_location() == []
     with pytest.raises(HDXError):
         dataset.add_country_location('lala')
     dataset.add_country_location('ukr')
     assert dataset['groups'] == [{'name': 'ukr'}]
     assert dataset.get_location() == ['Ukraine']
예제 #14
0
    def Download2Comp(self, keyword, fformat, dest):
        """
            Checking the metadata of the datasets
            returned to see if they are the data
            that we desire, by checking against
            two keywords
        """
        # Get the data information attached to each dataset
        self.resources = Dataset.get_all_resources(self.valid_datasets)

        # Getting the relevant data attached to each dataset
        get_data = ''
        for i in range(len(self.valid_datasets)):
            # Check all data attached to each dataset
            all_data = self.valid_datasets[i].get_resources()

            for j in range(len(all_data)):
                # Take data if it matches the keyword and format desired
                if keyword in all_data[j]['name'] and fformat in all_data[j][
                        'format']:
                    get_data = self.valid_datasets[i].get_resource(index=j)
                    try:
                        # Download it
                        get_data['format'] = ''
                        url, path = get_data.download(folder=dest)
                        print('Resource URL %s downloaded to %s' % (url, path))
                        get_data = ''  # Clear variable to avoid duplicate downloads in the event of failure

                    except:
                        print('Data not valid for download.')
예제 #15
0
def read_hdx_metadata(datasetinfo, today=None):
    # type: (Dict, Optional[datetime]) -> None
    """Read metadata from HDX dataset and add to input dictionary

    Args:
        datasetinfo (Dict): Dictionary of information about dataset
        today (Optional[datetime]): Value to use for today. Defaults to None (datetime.now()).

    Returns:
        None
    """
    dataset_name = datasetinfo['dataset']
    dataset = Dataset.read_from_hdx(dataset_name)
    url = datasetinfo.get('url')
    if not url:
        resource_name = datasetinfo.get('resource')
        format = datasetinfo['format']
        for resource in dataset.get_resources():
            if resource['format'] == format.upper():
                if resource_name and resource['name'] != resource_name:
                    continue
                url = resource['url']
                break
        if not url:
            raise ValueError('Cannot find %s resource in %s!' %
                             (format, dataset_name))
        datasetinfo['url'] = url
    if 'date' not in datasetinfo:
        datasetinfo['date'] = get_date_from_dataset_date(dataset, today=today)
    if 'source' not in datasetinfo:
        datasetinfo['source'] = dataset['dataset_source']
    if 'source_url' not in datasetinfo:
        datasetinfo['source_url'] = dataset.get_hdx_url()
예제 #16
0
def main():
    """Generate dataset and create it in HDX"""
    for dataset in Dataset.get_all_datasets(
            check_duplicates=False
    ):  # [Dataset.read_from_hdx('malawi-other')]:
        changed, error = dataset.clean_dataset_tags()

        if changed and not error:
            if real_run:
                try:
                    logger.info('%s: Updating dataset in HDX' %
                                dataset['name'])
                    dataset['batch_mode'] = 'KEEP_OLD'
                    dataset['skip_validation'] = True
                    dataset.update_in_hdx(update_resources=False,
                                          hxl_update=False)
                except HDXError as ex:
                    logger.exception(ex)
            if not dataset.get_tags():
                if dataset['private']:
                    privatepublic = 'private'
                else:
                    privatepublic = 'public'
                logger.warning('%s (%s) has no tags!' %
                               (dataset['name'], privatepublic))
예제 #17
0
def delete_package_from_hdx(context, data_dict):

    check_access('package_update', context)

    id = data_dict.get('id')
    if not id:
        raise ValidationError('Dataset id is missing!')

    try:
        data = logic.get_action('package_show')({
            'ignore_auth': True
        }, {
            'id': id
        })

        hdx_dataset = Dataset.read_from_hdx(data['name'])
        if hdx_dataset:
            hdx_dataset.delete_from_hdx()
            data['hdx_name'] = ""
            try:
                toolkit.get_action('package_update')(context, data)
            except ValidationError as e:
                try:
                    raise ValidationError(e.error_dict)
                except (KeyError, IndexError):
                    raise ValidationError(e.error_dict)

            return
        return "Dataset not found!"
    except Exception as e:
        log.debug(e)
        return "Please try again!"
예제 #18
0
 def test_add_update_delete_gallery(self, configuration, post_delete):
     dataset_data = copy.deepcopy(TestDataset.dataset_data)
     gallery_data = copy.deepcopy(TestDataset.gallery_data)
     dataset = Dataset(configuration, dataset_data)
     dataset.add_update_gallery(gallery_data)
     assert len(dataset.gallery) == 1
     dataset.delete_galleryitem('NOTEXIST')
     dataset.delete_galleryitem('d59a01d8-e52b-4337-bcda-fceb1d059bef')
     assert len(dataset.gallery) == 0
예제 #19
0
def generate_dataset(configuration, countryName):
    #showedName = countryName
    if (countryName == "Ivory Coast"):
        showedName = "Cote d'Ivoire"
    name = countryName + '-healthsites'
    title = countryName + '-healthsites'
    slugified_name = slugify(name).lower()
    # dataset = Dataset(configuration, {
    # })
    dataset = Dataset({
        'name': slugified_name,
        'title': title,
    })
    # dataset['name'] = slugified_name
    # dataset['title'] = title
    #generating the datasets
    getCountryHealthSites(configuration, countryName)
    # geojson resource
    if (os.path.isfile(configuration.read()['data_folder'] + countryName +
                       '.geojson')):
        rName = countryName + '-healthsites-geojson'
        geojsonResource = Resource()
        geojsonResource['name'] = rName
        geojsonResource['format'] = 'geojson'
        geojsonResource['url'] = configuration.read()['base_url']
        geojsonResource['description'] = countryName + ' healthsites geojson'
        geojsonResource.set_file_to_upload(
            configuration.read()['data_folder'] + countryName + '.geojson')

        geojsonResource.check_required_fields(['group', 'package_id'])
        dataset.add_update_resource(geojsonResource)
    #csv resource
    if (os.path.isfile(configuration.read()['data_folder'] + countryName +
                       '.csv')):
        resource_csv = Resource()
        resource_csv['name'] = countryName + '-healthsites-csv'
        resource_csv['description'] = countryName + ' healthsites csv'
        resource_csv['format'] = 'csv'
        resource_csv.set_file_to_upload(configuration.read()['data_folder'] +
                                        countryName + '.csv')

        resource_csv.check_required_fields(['group', 'package_id'])
        dataset.add_update_resource(resource_csv)
    # shp resource
    if (os.path.isfile(configuration.read()['data_folder'] + countryName +
                       "-shapefiles.zip")):
        resource_shp = Resource()
        resource_shp['name'] = countryName + '-healthsites-shp'
        resource_shp['format'] = 'zipped shapefile'
        resource_shp['description'] = countryName + ' healthsites shapefiles'
        resource_shp.set_file_to_upload(configuration.read()['data_folder'] +
                                        countryName + "-shapefiles.zip")

        resource_shp.check_required_fields(['group', 'package_id'])
        dataset.add_update_resource(resource_shp)

    return dataset
예제 #20
0
def get_new_date(urlend, docname):
    # Gets specific url for indicated category
    Configuration.create(hdx_site='prod',
                         user_agent='A_Quick_Example',
                         hdx_read_only=True)
    dataset = Dataset.read_from_hdx(urlend)
    datasets = Dataset.search_in_hdx(docname, rows=10)
    resources = Dataset.get_all_resources(datasets)
    # Creates variable for most updated version of dataset date
    y = dataset.get_dataset_date()
    # Gets year, month, and day of dataset
    year1 = y[:4]
    month1 = y[5:7]
    day1 = y[8:10]
    # Organizes dataset date into datetime format
    global d2
    d2 = datetime.datetime(int(year1), int(month1), int(day1))
예제 #21
0
 def test_add_update_delete_resources(self, configuration, post_delete):
     dataset_data = copy.deepcopy(TestDataset.dataset_data)
     resources_data = copy.deepcopy(TestDataset.resources_data)
     dataset = Dataset(configuration, dataset_data)
     dataset.add_update_resources(resources_data)
     assert len(dataset.resources) == 2
     dataset.delete_resource('NOTEXIST')
     assert len(dataset.resources) == 2
     dataset.delete_resource('de6549d8-268b-4dfe-adaf-a4ae5c8510d5')
     assert len(dataset.resources) == 1
예제 #22
0
 def get_url(self):
     Configuration.create(hdx_site='prod',
                          user_agent='A_Quick_Example',
                          hdx_read_only=True)
     dataset = Dataset.read_from_hdx('movement-range-maps')
     resources = dataset.get_resources()
     dic = resources[1]
     self.url = dic['download_url']
     return self
예제 #23
0
def get_date_from_dataset_date(dataset):
    if isinstance(dataset, str):
        dataset = Dataset.read_from_hdx(dataset)
    date_type = dataset.get_dataset_date_type()
    if date_type == 'range':
        return dataset.get_dataset_end_date(date_format='%Y-%m-%d')
    elif date_type == 'date':
        return dataset.get_dataset_date(date_format='%Y-%m-%d')
    return None
def main():
    conf = Configuration()

    countries = {
        'Benin': "BEN",
        #              'Burkina Faso': "BFA",
        #             'Burundi': "BDI",
        #             'Congo': "COG",
        #              'Ivory Coast': "CIV",
        'Ghana': "GHA"
        #              'Guinea': "GIN",
        #              'Guinea-bissau': "GNB",
        #              'Gambia': "GMB",
        #              'Liberia': "LBR",
        #              'Mali': "MLI",
        #              'Mauritania': "MRT",
        #             'Malawi':"MWI",
        #             'Marocco': "MAR",
        #              'Niger': "NER",
        #              'Nigeria': "NGA",
        #              'Senegal':"SEN",
        #              'Sierra Leone': "SLE",
        #              'Togo': "TGO",
        #              'Cameroon': "CMR",
        #              'Central African Republic':"CAR",
        #              'Tanzania':"TZA",
        #              'Rwanda': "RWA",
        #              'Somalia': "SOM",
        #              'South Sudan': "SSD",
        #              'Yemen': "YEM",
        #              'Democratic Republic of The Congo': "COD",
        #              'Uganda': "UGA",
        #              'Zambia': "ZMB",
        #              'Angola': "AGO",
        #              'Kenya': "KEN",
        #              'Ethiopia': "ETH"
        #              'Algeria': "DZA",
        #              'Egypt': "EGY",
        #             'Tunisia':"TUN"
        #            'Haiti': "HTI",
        #            'Libya': "LBY",
        #            'Sudan': "SDN",
        #            'Bangladesh': "BGD",
        #            'Djibouti': "DJI",
        #            'Gabon': "GAB",
        #            'Madagascar': "MDG",
        #            'Lesotho': "LSO",
        #            'Namibia': "NAM",
        #            'Zimbabwe': "ZWE",
        #            'Mozambique': "MOZ",
        #            'Botswana': "BWA"
        #            'Palestine': "PSE",
        #            'Mauritius' : "MUS",
        #            'Zambia' : "ZMB"
    }

    dataset = Dataset.read_from_hdx('zimbabwe-healthsites')
예제 #25
0
def sync_datasets(datasets, update_dataset_date=False):
    for dataset in datasets:
        exists = Dataset.read_from_hdx(dataset['name'])
        if exists:
            if update_dataset_date:
                dataset.set_dataset_date_from_datetime(datetime.now())
            dataset.update_in_hdx()
        else:
            dataset.set_dataset_date_from_datetime(datetime.now())
            dataset.create_in_hdx(allow_no_resources=True)
예제 #26
0
def updateTag(iso2):
    #https://data.humdata.org/dataset/unhabitat-zw-indicators
    iso = iso2.lower()
    url = "unhabitat-%s-indicators" % iso
    print(url)

    dataset = Dataset.read_from_hdx("unhabitat-%s-indicators" % iso)
    # print(dataset)
    dataset.add_tag('INDICATORS')
    dataset.update_in_hdx()
예제 #27
0
 def sync_datasets(self, update_dataset_date=False):  # noqa
     for dataset in self.datasets.values():
         exists = Dataset.read_from_hdx(dataset['name'])
         if exists:
             if update_dataset_date:
                 dataset.set_dataset_date_from_datetime(datetime.now())
             dataset.update_in_hdx()
         else:
             dataset.set_dataset_date_from_datetime(datetime.now())
             dataset.create_in_hdx(allow_no_resources=True)
def check_type(dataset, file_types=[]):
    temp_dataset = Dataset.read_from_hdx(dataset)
    temp_dataset.separate_resources()
    if (len(temp_dataset.resources) > 0):
        if (len(file_types) > 0):
            if (not set(temp_dataset.get_filetypes()).isdisjoint(file_types)):
                return True
        else:
            return True
    return False
예제 #29
0
 def test_add_update_delete_gallery(self, configuration, post_delete):
     dataset_data = copy.deepcopy(TestDataset.dataset_data)
     gallery_data = copy.deepcopy(TestDataset.gallery_data)
     dataset = Dataset(configuration, dataset_data)
     dataset.add_update_gallery(gallery_data)
     assert len(dataset.gallery) == 1
     dataset.delete_galleryitem('NOTEXIST')
     dataset.delete_galleryitem('d59a01d8-e52b-4337-bcda-fceb1d059bef')
     assert len(dataset.gallery) == 0
예제 #30
0
 def test_update_yaml(self, configuration, static_yaml):
     dataset_data = copy.deepcopy(TestDataset.dataset_data)
     dataset = Dataset(configuration, dataset_data)
     assert dataset['name'] == 'MyDataset1'
     assert dataset['author'] == 'AN Other'
     dataset.update_yaml(static_yaml)
     assert dataset['name'] == 'MyDataset1'
     assert dataset['author'] == 'acled'
     assert dataset.get_resources() == [{"id": "ABC", "description": "Resource1",
                                         "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "name": "Resource1",
                                         "url": "http://resource1.xlsx",
                                         "format": "xlsx"},
                                        {"id": "DEF", "description": "Resource2",
                                         "package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "name": "Resource2",
                                         "url": "http://resource2.csv",
                                         "format": "csv"}]
     assert dataset.get_gallery() == [{'image_url': 'http://docs.hdx.rwlabs.org/wp-content/uploads/acled_visual.png',
                                       'url': 'http://www.acleddata.com/visuals/maps/dynamic-maps/',
                                       'type': 'visualization', 'title': 'Dynamic Map: Political Conflict in Africa',
                                       'description': 'ACLED maps'}]
    def process_mobility(self):

        print("Processing Mobility indices data ...")
        Configuration.create(hdx_site='prod',
                            user_agent='A_Quick_Example',
                            hdx_read_only=True)
        dataset = Dataset.read_from_hdx('movement-range-maps')
        resources = dataset.get_resources()
        dic = resources[1]
        url_mobility = dic['download_url']

        self.file_mobility = "/home/ludo915/code/covsco/data/train/mobility/fr/mvt_range.zip"
        download_url(url_mobility, self.file_mobility)

        with ZipFile(self.file_mobility, 'r',) as zipf:
            zipf.printdir()
            print('Extracting mv_range file now...')
            mvt_range = zipf.namelist()[-1]
            zipf.extract(mvt_range,"/home/ludo915/code/covsco/data/train/mobility/fr/")
            print('Done!')

        os.chdir("/home/ludo915/code/covsco/data/train/mobility/fr/")
        os.system("""grep "FRA" """+ mvt_range + """ > mouvement-range-FRA.txt""")
        os.system("""head -n 1 """+ mvt_range + """ > header.txt""")
        os.system("""cat header.txt mouvement-range-FRA.txt > mouvement-range-FRA-final.csv""")
        os.chdir("/home/ludo915/code/covsco/scripts")
        self.df = pd.read_csv("/home/ludo915/code/covsco/data/train/mobility/fr/mouvement-range-FRA-final.csv", sep = '\t')
        print(self.df)
        self.df["ds"]=pd.to_datetime(self.df["ds"], dayfirst = True)
        self.df['polygon_name'] = self.df['polygon_name'].replace(
            {'Ile-de-France': 'Île-de-France',\
            '-le-de-France': 'Île-de-France',\
            "Auvergne-Rh-ne-Alpes":"Auvergne-Rhône-Alpes",\
            "Bourgogne-Franche-Comt-":"Bourgogne-Franche-Comté",\
            "Provence-Alpes-C-te d'Azur":"Provence-Alpes-Côte d'Azur"})

        self.df2 = pd.read_csv('/home/ludo915/code/covsco/data/train/all_data_merged/fr/Enriched_Covid_history_data.csv')
        self.df2["date"]=pd.to_datetime(self.df2["date"])
        self.df3 = pd.read_csv("/home/ludo915/code/covsco/data/train/pop/fr/regions_departements.csv", sep = ";")

        self.df.reset_index(inplace=  True)
        self.df2.reset_index(inplace = True)
        self.df3.reset_index(inplace = True)
        self.df.drop(columns = ["index"],inplace = True)
        self.df2.drop(columns = ["index"],inplace = True)
        self.df3.drop(columns = ["index"],inplace = True)

        self.df2 = self.df2.merge(self.df3, how='inner', left_on = "numero", right_on = "depnum",suffixes=("","_y"))
        self.df2 = self.df2.merge(self.df, how ="outer", left_on = ["Region","date"], right_on = ["polygon_name","ds"],suffixes=("","_y")).dropna()
        print(self.df2)
        self.df2.to_csv("/home/ludo915/code/covsco/data/train/all_data_merged/fr/Enriched_Covid_history_data.csv", index = False)
        print('OK')

        return None
예제 #32
0
def get_resources(url_end, csv_filename, docname, keyword):
    Configuration.create(hdx_site='prod',
                         user_agent='A_Quick_Example',
                         hdx_read_only=True)
    # Gets web url
    dataset = Dataset.read_from_hdx(url_end)
    # Writes Dataset Date in dependencydate csv
    f = open(
        '/Users/katherinenewcomb/Desktop/TestingRepo/{}'.format(csv_filename),
        "w+")
    f.write(dataset.get_dataset_date())
    # Searches for specific file on web url
    datasets = Dataset.search_in_hdx(docname, rows=10)
    # Grabs resources from file
    global resources
    resources = Dataset.get_all_resources(datasets)
    # Only uncomment if you want to download file!!
    url, path = resources[0].download(
        '/Users/katherinenewcomb/Desktop/TestingRepo')
    print('Resource URL %s downloaded to %s' % (url, path))
예제 #33
0
 def test_add_update_delete_resources(self, configuration, post_delete):
     dataset_data = copy.deepcopy(TestDataset.dataset_data)
     resources_data = copy.deepcopy(TestDataset.resources_data)
     dataset = Dataset(configuration, dataset_data)
     dataset.add_update_resources(resources_data)
     assert len(dataset.resources) == 2
     dataset.delete_resource('NOTEXIST')
     assert len(dataset.resources) == 2
     dataset.delete_resource('de6549d8-268b-4dfe-adaf-a4ae5c8510d5')
     assert len(dataset.resources) == 1
예제 #34
0
 def sync_datasets(self):  # noqa
     for dataset in self.datasets.values():
         try:
             exists = Dataset.read_from_hdx(dataset['name'])
             if exists:
                 dataset.update_in_hdx()
             else:
                 dataset.create_in_hdx()
         except Exception as e:
             client.captureException()
             LOG.warn(e)
             LOG.warn(traceback.format_exc())
def generate_dataset(configuration):

    url = configuration['base_url'] + configuration['api']
    loaData.writeData(url)

    name = 'Africa health facilities'
    title = 'Africa health facilities data'
    slugified_name = slugify(name).lower()
    dataset = Dataset(configuration, {})
    dataset['name'] = slugified_name
    dataset['title'] = title
    date = time.strftime("%d/%m/%Y")
    dataset['dataset_date'] = date
    dataset.add_continent_location('AF')

    rName = "sen-healthfacilities"
    resource = Resource()
    resource['name'] = rName
    resource['format'] = 'geojson'
    resource['url'] = url
    resource['description'] = configuration['base_url']
    resource['url_type'] = 'api'
    resource['resource_type'] = 'api'
    resource.set_file_to_upload(configuration['data_folder'] +
                                'sen-healthfacilities.geojson')

    dataset.add_update_resource(resource)

    return dataset
예제 #36
0
    def test_create_in_hdx(self, configuration, post_create):
        dataset = Dataset(configuration)
        with pytest.raises(HDXError):
            dataset.create_in_hdx()
        dataset['id'] = 'TEST1'
        dataset['name'] = 'LALA'
        with pytest.raises(HDXError):
            dataset.create_in_hdx()

        dataset_data = copy.deepcopy(TestDataset.dataset_data)
        dataset = Dataset(configuration, dataset_data)
        dataset.create_in_hdx()
        assert dataset['id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d'
        assert len(dataset.resources) == 2
        assert len(dataset.gallery) == 0

        dataset_data['name'] = 'MyDataset2'
        dataset = Dataset(configuration, dataset_data)
        with pytest.raises(HDXError):
            dataset.create_in_hdx()

        dataset_data['name'] = 'MyDataset3'
        dataset = Dataset(configuration, dataset_data)
        with pytest.raises(HDXError):
            dataset.create_in_hdx()

        dataset_data = copy.deepcopy(TestDataset.dataset_data)
        gallery_data = copy.deepcopy(TestDataset.gallery_data)
        dataset_data["gallery"] = gallery_data
        with pytest.raises(HDXError):
            dataset = Dataset(configuration, dataset_data)
        del dataset_data["gallery"]
        dataset = Dataset(configuration, dataset_data)
        dataset.add_update_gallery(gallery_data)
        dataset.create_in_hdx()
        assert dataset['id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d'
        assert len(dataset.resources) == 2
        assert len(dataset.gallery) == 1
예제 #37
0
def generate_dataset_and_showcase(acled_url, hxlproxy_url, downloader, countrydata):
    """
      Create HXLated URLs to ACLED API
      eg. https://data.humdata.org/hxlproxy/data.csv?name=ACLEDHXL&url=https%3A//api.acleddata.com/acled/read.csv%3Flimit%3D0%26iso%3D120&tagger-match-all=on&tagger-02-header=iso&tagger-02-tag=%23country%2Bcode&tagger-03-header=event_id_cnty&tagger-03-tag=%23event%2Bcode&tagger-05-header=event_date&tagger-05-tag=%23date%2Boccurred+&tagger-08-header=event_type&tagger-08-tag=%23event%2Btype&tagger-09-header=actor1&tagger-09-tag=%23group%2Bname%2Bfirst&tagger-10-header=assoc_actor_1&tagger-10-tag=%23group%2Bname%2Bfirst%2Bassoc&tagger-12-header=actor2&tagger-12-tag=%23group%2Bname%2Bsecond&tagger-13-header=assoc_actor_2&tagger-13-tag=%23group%2Bname%2Bsecond%2Bassoc&tagger-16-header=region&tagger-16-tag=%23region%2Bname&tagger-17-header=country&tagger-17-tag=%23country%2Bname&tagger-18-header=admin1&tagger-18-tag=%23adm1%2Bname&tagger-19-header=admin2&tagger-19-tag=%23adm2%2Bname&tagger-20-header=admin3&tagger-20-tag=%23adm3%2Bname&tagger-21-header=location&tagger-21-tag=%23loc%2Bname&tagger-22-header=latitude&tagger-22-tag=%23geo%2Blat&tagger-23-header=longitude&tagger-23-tag=%23geo%2Blon&tagger-25-header=source&tagger-25-tag=%23meta%2Bsource&tagger-27-header=notes&tagger-27-tag=%23description&tagger-28-header=fatalities&tagger-28-tag=%23affected%2Bkilled&header-row=1
    """
    countryname = countrydata['countryname']
    title = '%s - Conflict Data' % countryname
    logger.info('Creating dataset: %s' % title)
    slugified_name = slugify('ACLED Data for %s' % countryname).lower()
    countryiso = countrydata['iso3']
    dataset = Dataset({
        'name': slugified_name,
        'title': title,
    })
    dataset.set_maintainer('8b84230c-e04a-43ec-99e5-41307a203a2f')
    dataset.set_organization('b67e6c74-c185-4f43-b561-0e114a736f19')
    dataset.set_expected_update_frequency('Live')
    dataset.set_subnational(True)
    dataset.add_country_location(countryiso)
    tags = ['HXL', 'conflicts', 'political violence', 'protests']
    dataset.add_tags(tags)

    acled_country_url = '%siso=%d' % (acled_url, countrydata['m49'])
    url = '%surl=%s%s' % (hxlproxy_url, quote_plus(acled_country_url), hxlate)
    earliest_year = 10000
    latest_year = 0
    for row in downloader.get_tabular_rows(acled_country_url, dict_rows=True, headers=1):
        year = int(row['year'])
        if year < earliest_year:
            earliest_year = year
        if year > latest_year:
            latest_year = year

    if latest_year == 0:
        logger.warning('%s has no data!' % countryname)
        return None, None

    resource = {
        'name': 'Conflict Data for %s' % countryname,
        'description': 'Conflict data with HXL tags',
        'format': 'csv',
        'url': url
    }
    dataset.add_update_resource(resource)
    dataset.set_dataset_year_range(earliest_year, latest_year)

    showcase = Showcase({
        'name': '%s-showcase' % slugified_name,
        'title': 'Dashboard for %s' % countrydata['countryname'],
        'notes': 'Conflict Data Dashboard for %s' % countrydata['countryname'],
        'url': 'https://www.acleddata.com/dashboard/#%03d' % countrydata['m49'],
        'image_url': 'https://www.acleddata.com/wp-content/uploads/2018/01/dash.png'
    })
    showcase.add_tags(tags)
    return dataset, showcase
예제 #38
0
    def test_update_in_hdx(self, configuration, post_update):
        dataset = Dataset(configuration)
        dataset['id'] = 'NOTEXIST'
        with pytest.raises(HDXError):
            dataset.update_in_hdx()
        dataset['name'] = 'LALA'
        with pytest.raises(HDXError):
            dataset.update_in_hdx()

        dataset = Dataset.read_from_hdx(configuration, 'TEST1')
        assert dataset['id'] == '6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d'
        assert dataset['dataset_date'] == '06/04/2016'

        dataset['dataset_date'] = '02/26/2016'
        dataset['id'] = 'TEST1'
        dataset['name'] = 'MyDataset1'
        dataset.update_in_hdx()
        assert dataset['id'] == 'TEST1'
        assert dataset['dataset_date'] == '02/26/2016'

        dataset['id'] = 'NOTEXIST'
        with pytest.raises(HDXError):
            dataset.update_in_hdx()

        del dataset['id']
        with pytest.raises(HDXError):
            dataset.update_in_hdx()

        dataset_data = copy.deepcopy(TestDataset.dataset_data)
        gallery_data = copy.deepcopy(TestDataset.gallery_data)
        dataset_data['name'] = 'MyDataset1'
        dataset_data['id'] = 'TEST1'
        dataset = Dataset(configuration, dataset_data)
        dataset.add_update_gallery(gallery_data)
        dataset.create_in_hdx()
        assert dataset['id'] == 'TEST1'
        assert dataset['dataset_date'] == '03/23/2016'
        assert len(dataset.resources) == 2
        assert len(dataset.gallery) == 1
        dataset.update_in_hdx()
        assert len(dataset.resources) == 2
        assert len(dataset.gallery) == 1
예제 #39
0
 def test_delete_from_hdx(self, configuration, post_delete):
     dataset = Dataset.read_from_hdx(configuration, 'TEST1')
     dataset.delete_from_hdx()
     del dataset['id']
     with pytest.raises(HDXError):
         dataset.delete_from_hdx()