Python Showcase Exemples, hdx.data.showcase.Showcase Python Exemples

Exemple #1

0

Afficher le fichier

 def test_read_from_hdx(self, configuration, read):
     showcase = Showcase.read_from_hdx('TEST1')
     assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746'
     assert showcase['title'] == 'MyShowcase1'
     showcase = Showcase.read_from_hdx('TEST2')
     assert showcase is None
     showcase = Showcase.read_from_hdx('TEST3')
     assert showcase is None

Exemple #2

0

Afficher le fichier

 def test_update_json(self, configuration, static_json):
     showcase_data = copy.deepcopy(TestShowcase.showcase_data)
     showcase = Showcase(showcase_data)
     assert showcase['title'] == 'MyShowcase1'
     assert showcase['name'] == 'showcase-1'
     showcase.update_from_json(static_json)
     assert showcase['title'] == 'MyShowcase1'
     assert showcase['name'] == 'new-showcase-1'

Exemple #3

0

Afficher le fichier

Fichier : test_showcase.py Projet : OCHA-DAP/hdx-python-api

 def test_read_from_hdx(self, configuration, read):
     showcase = Showcase.read_from_hdx('05e392bf-04e0-4ca6-848c-4e87bba10746')
     assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746'
     assert showcase['title'] == 'MyShowcase1'
     showcase = Showcase.read_from_hdx('TEST2')
     assert showcase is None
     showcase = Showcase.read_from_hdx('TEST3')
     assert showcase is None

Exemple #4

0

Afficher le fichier

Fichier : test_showcase.py Projet : OCHA-DAP/hdx-python-api

 def test_update_json(self, configuration, static_json):
     showcase_data = copy.deepcopy(TestShowcase.showcase_data)
     showcase = Showcase(showcase_data)
     assert showcase['title'] == 'MyShowcase1'
     assert showcase['name'] == 'showcase-1'
     showcase.update_from_json(static_json)
     assert showcase['title'] == 'MyShowcase1'
     assert showcase['name'] == 'new-showcase-1'

Exemple #5

0

Afficher le fichier

Fichier : acled.py Projet : mcarans/hdxscraper-acled-africa

def generate_dataset_and_showcase(acled_url, hxlproxy_url, downloader, countrydata):
    """
      Create HXLated URLs to ACLED API
      eg. https://data.humdata.org/hxlproxy/data.csv?name=ACLEDHXL&url=https%3A//api.acleddata.com/acled/read.csv%3Flimit%3D0%26iso%3D120&tagger-match-all=on&tagger-02-header=iso&tagger-02-tag=%23country%2Bcode&tagger-03-header=event_id_cnty&tagger-03-tag=%23event%2Bcode&tagger-05-header=event_date&tagger-05-tag=%23date%2Boccurred+&tagger-08-header=event_type&tagger-08-tag=%23event%2Btype&tagger-09-header=actor1&tagger-09-tag=%23group%2Bname%2Bfirst&tagger-10-header=assoc_actor_1&tagger-10-tag=%23group%2Bname%2Bfirst%2Bassoc&tagger-12-header=actor2&tagger-12-tag=%23group%2Bname%2Bsecond&tagger-13-header=assoc_actor_2&tagger-13-tag=%23group%2Bname%2Bsecond%2Bassoc&tagger-16-header=region&tagger-16-tag=%23region%2Bname&tagger-17-header=country&tagger-17-tag=%23country%2Bname&tagger-18-header=admin1&tagger-18-tag=%23adm1%2Bname&tagger-19-header=admin2&tagger-19-tag=%23adm2%2Bname&tagger-20-header=admin3&tagger-20-tag=%23adm3%2Bname&tagger-21-header=location&tagger-21-tag=%23loc%2Bname&tagger-22-header=latitude&tagger-22-tag=%23geo%2Blat&tagger-23-header=longitude&tagger-23-tag=%23geo%2Blon&tagger-25-header=source&tagger-25-tag=%23meta%2Bsource&tagger-27-header=notes&tagger-27-tag=%23description&tagger-28-header=fatalities&tagger-28-tag=%23affected%2Bkilled&header-row=1
    """
    countryname = countrydata['countryname']
    title = '%s - Conflict Data' % countryname
    logger.info('Creating dataset: %s' % title)
    slugified_name = slugify('ACLED Data for %s' % countryname).lower()
    countryiso = countrydata['iso3']
    dataset = Dataset({
        'name': slugified_name,
        'title': title,
    })
    dataset.set_maintainer('8b84230c-e04a-43ec-99e5-41307a203a2f')
    dataset.set_organization('b67e6c74-c185-4f43-b561-0e114a736f19')
    dataset.set_expected_update_frequency('Live')
    dataset.set_subnational(True)
    dataset.add_country_location(countryiso)
    tags = ['HXL', 'conflicts', 'political violence', 'protests']
    dataset.add_tags(tags)

    acled_country_url = '%siso=%d' % (acled_url, countrydata['m49'])
    url = '%surl=%s%s' % (hxlproxy_url, quote_plus(acled_country_url), hxlate)
    earliest_year = 10000
    latest_year = 0
    for row in downloader.get_tabular_rows(acled_country_url, dict_rows=True, headers=1):
        year = int(row['year'])
        if year < earliest_year:
            earliest_year = year
        if year > latest_year:
            latest_year = year

    if latest_year == 0:
        logger.warning('%s has no data!' % countryname)
        return None, None

    resource = {
        'name': 'Conflict Data for %s' % countryname,
        'description': 'Conflict data with HXL tags',
        'format': 'csv',
        'url': url
    }
    dataset.add_update_resource(resource)
    dataset.set_dataset_year_range(earliest_year, latest_year)

    showcase = Showcase({
        'name': '%s-showcase' % slugified_name,
        'title': 'Dashboard for %s' % countrydata['countryname'],
        'notes': 'Conflict Data Dashboard for %s' % countrydata['countryname'],
        'url': 'https://www.acleddata.com/dashboard/#%03d' % countrydata['m49'],
        'image_url': 'https://www.acleddata.com/wp-content/uploads/2018/01/dash.png'
    })
    showcase.add_tags(tags)
    return dataset, showcase

Exemple #6

0

Afficher le fichier

Fichier : test_showcase.py Projet : OCHA-DAP/hdx-python-api

 def test_datasets(self, configuration, read):
     showcase = Showcase.read_from_hdx('05e392bf-04e0-4ca6-848c-4e87bba10746')
     datasets = showcase.get_datasets()
     assert len(datasets) == 10
     assert datasets[0].data == datasetsdict[0]
     dict4 = copy.deepcopy(datasetsdict[4])
     del dict4['resources']
     assert datasets[4].data == dict4
     TestShowcase.association = None
     showcase.remove_dataset(datasets[0])
     assert TestShowcase.association == 'delete'
     TestShowcase.association = None
     assert showcase.add_dataset('a2f32edd-bac2-4940-aa58-49e565041055') is True
     assert TestShowcase.association == 'create'
     TestShowcase.association = None
     assert showcase.add_datasets([{'id': 'a2f32edd-bac2-4940-aa58-49e565041055'}, {'id': '6a5aebc1-f5a9-4842-8183-b8118228e71e'}]) is False
     assert TestShowcase.association == 'create'
     TestShowcase.association = None
     assert showcase.add_dataset({'name': 'TEST1'}) is True
     assert TestShowcase.association == 'create'
     TestShowcase.association = None
     with pytest.raises(HDXError):
         showcase.add_dataset('123')
     with pytest.raises(HDXError):
         showcase.add_dataset(123)

Exemple #7

0

Afficher le fichier

Fichier : test_showcase.py Projet : EmmaArnold/hdx-python-api

 def test_delete_from_hdx(self, configuration, post_delete):
     showcase = Showcase.read_from_hdx(
         '05e392bf-04e0-4ca6-848c-4e87bba10746')
     showcase.delete_from_hdx()
     del showcase['id']
     with pytest.raises(HDXError):
         showcase.delete_from_hdx()

Exemple #8

0

Afficher le fichier

Fichier : test_showcase.py Projet : EmmaArnold/hdx-python-api

 def test_datasets(self, configuration, read):
     showcase = Showcase.read_from_hdx(
         '05e392bf-04e0-4ca6-848c-4e87bba10746')
     datasets = showcase.get_datasets()
     assert len(datasets) == 10
     assert datasets[0].data == datasetsdict['results'][0]
     dict4 = copy.deepcopy(datasetsdict['results'][4])
     del dict4['resources']
     assert datasets[4].data == dict4
     TestShowcase.association = None
     showcase.remove_dataset(datasets[0])
     assert TestShowcase.association == 'delete'
     TestShowcase.association = None
     assert showcase.add_dataset(
         'a2f32edd-bac2-4940-aa58-49e565041055') is True
     assert TestShowcase.association == 'create'
     TestShowcase.association = None
     assert showcase.add_datasets(
         [{
             'id': 'a2f32edd-bac2-4940-aa58-49e565041055'
         }, {
             'id': '6a5aebc1-f5a9-4842-8183-b8118228e71e'
         }]) is False
     assert TestShowcase.association == 'create'
     TestShowcase.association = None
     assert showcase.add_dataset({'name': 'TEST1'}) is True
     assert TestShowcase.association == 'create'
     TestShowcase.association = None
     with pytest.raises(HDXError):
         showcase.add_dataset('123')
     with pytest.raises(HDXError):
         showcase.add_dataset(123)

Exemple #9

0

Afficher le fichier

 def test_datasets(self, configuration, read):
     showcase = Showcase.read_from_hdx('TEST1')
     datasets = showcase.get_datasets()
     assert len(datasets) == 10
     assert datasets[0].data == datasetsdict[0]
     dict4 = copy.deepcopy(datasetsdict[4])
     del dict4['resources']
     assert datasets[4].data == dict4
     TestShowcase.association = None
     showcase.remove_dataset(datasets[0])
     assert TestShowcase.association == 'delete'
     TestShowcase.association = None
     assert showcase.add_dataset('lala') is True
     assert TestShowcase.association == 'create'
     TestShowcase.association = None
     assert showcase.add_datasets([{
         'id': 'lala'
     }, {
         'id':
         '6a5aebc1-f5a9-4842-8183-b8118228e71e'
     }]) is False
     assert TestShowcase.association == 'create'
     TestShowcase.association = None
     with pytest.raises(HDXError):
         showcase.add_dataset(123)

Exemple #10

0

Afficher le fichier

def create_dataset_showcase(name,
                            countryname,
                            countryiso2,
                            countryiso3,
                            single_dataset=False):
    slugified_name = slugify(name).lower()
    slugified_name = slugified_name.replace(
        "united-kingdom-of-great-britain-and-northern-ireland",
        "uk")  # Too long
    slugified_name = slugified_name.replace(
        "demographic-and-socio-economic-indicators", "dsei")  # Too long
    if single_dataset:
        title = '%s - Sustainable development, Education, Demographic and Socioeconomic Indicators' % countryname
    else:
        title = name
    dataset = Dataset({'name': slugified_name, 'title': title})
    dataset.set_maintainer('196196be-6037-4488-8b71-d786adf4c081')
    dataset.set_organization('18f2d467-dcf8-4b7e-bffa-b3c338ba3a7c')
    dataset.set_subnational(False)
    try:
        dataset.add_country_location(countryiso3)
    except HDXError as e:
        logger.exception('%s has a problem! %s' % (countryname, e))
        return None, None
    dataset.set_expected_update_frequency('Every year')
    tags = [
        'indicators', 'sustainable development', 'demographics',
        'socioeconomics', 'education'
    ]
    dataset.add_tags(tags)

    showcase = Showcase({
        'name':
        '%s-showcase' % slugified_name,
        'title':
        name,
        'notes':
        'Education, literacy and other indicators for %s' % countryname,
        'url':
        'http://uis.unesco.org/en/country/%s' % countryiso2,
        'image_url':
        'http://www.tellmaps.com/uis/internal/assets/uisheader-en.png'
    })
    showcase.add_tags(tags)

    return dataset, showcase

Exemple #11

0

Afficher le fichier

def generate_dataset_and_showcase(countryName, countryISO2):
    title = '%s - Demographic, Health, Education and Transport indicators' % countryName
    logger.info('Creating dataset: %s' % title)
    name = 'unhabitat-%s-indicators' % countryISO2
    slugified_name = slugify(name).lower()
    dataset = Dataset({
        'name': slugified_name,
        'title': title,
    })
    # dataset.set_dataset_date(date, dataset_end_date=)
    dataset.set_dataset_year_range(1950, 2050)
    dataset.set_expected_update_frequency('Every year')
    dataset.set_subnational(1)
    dataset.add_country_location(getCountryISO3Code(countryISO2))
    dataset.add_tags(['EDUCATION', 'POPULATION', 'HEALTH', 'TRANSPORT', 'HXL'])

    if os.path.isfile('data/indicator_data_' + countryISO2 + '.csv'):
        resource = Resource()
        resource['name'] = 'Indicators_data_%s' % countryISO2
        resource[
            'description'] = '%s - Demographic, Health, Education and Transport indicators' % countryName
        resource['format'] = 'csv'
        resource.set_file_to_upload('data/indicator_data_' + countryISO2 +
                                    '.csv')
    resource.check_required_fields(['group', 'package_id'])
    dataset.add_update_resource(resource)

    showcase_name = slugify('unhabitat-%s' % countryName +
                            ' indacators-data').lower()
    showcase = Showcase({
        'name':
        showcase_name,
        'title':
        'Explore %s' % countryName + ' indicators',
        'notes':
        'Explore %s' % countryName + ' indicators',
        'url':
        'http://urbandata.unhabitat.org/data-country/?countries=%s' %
        countryISO2 +
        '&indicators=total_length_road,rural_population,urban_population_countries,urban_slum_population_countries,population,income_gini_coefficient_countries',
        'image_url':
        'https://centre.humdata.org/wp-content/uploads/2018/09/unhabitat-showcase.png'
    })
    showcase.add_tags(['EDUCATION', 'POPULATION', 'HEALTH', 'TRANSPORT'])

    return dataset, showcase

Exemple #12

0

Afficher le fichier

Fichier : test_showcase.py Projet : EmmaArnold/hdx-python-api

 def test_search_in_hdx(self, configuration, allsearch):
     showcases = Showcase.search_in_hdx('ACLED')
     assert len(showcases) == 10
     showcases = Showcase.search_in_hdx('ACLED', offset=2, limit=6)
     assert len(showcases) == 6
     showcases = Showcase.search_in_hdx(
         fq='metadata_modified:[2018-01-01T00:00:00.000Z TO NOW]')
     assert len(showcases) == 1
     showcases = Showcase.search_in_hdx('ajyhgr')
     assert len(showcases) == 0
     with pytest.raises(HDXError):
         Showcase.search_in_hdx('"')
     with pytest.raises(HDXError):
         Showcase.search_in_hdx('ACLED', rows=11)
     with pytest.raises(HDXError):
         # Test returned row counts per page mismatch (wrong count of 6 purposely in mocksearch)
         Showcase.search_in_hdx('ACLED', page_size=5)

Exemple #13

0

Afficher le fichier

def generate_dataset_and_showcase(base_url, downloader, folder, country):
    countryname = country['countryname']
    title = '%s - Conflict Data' % countryname
    logger.info('Creating dataset: %s' % title)
    slugified_name = slugify('ACLED Data for %s' % countryname).lower()
    countryiso = country['iso3']
    dataset = Dataset({
        'name': slugified_name,
        'title': title,
    })
    dataset.set_maintainer('8b84230c-e04a-43ec-99e5-41307a203a2f')
    dataset.set_organization('b67e6c74-c185-4f43-b561-0e114a736f19')
    dataset.set_expected_update_frequency('Every week')
    dataset.set_subnational(True)
    dataset.add_country_location(countryiso)
    tags = ['hxl', 'violence and conflict', 'protests', 'security incidents']
    dataset.add_tags(tags)

    url = '%siso=%d' % (base_url, country['m49'])
    filename = 'conflict_data_%s.csv' % countryiso
    resourcedata = {
        'name': 'Conflict Data for %s' % countryname,
        'description': 'Conflict data with HXL tags'
    }
    quickcharts = {'cutdown': 2, 'cutdownhashtags': ['#date+year', '#adm1+name', '#affected+killed']}
    success, results = dataset.download_and_generate_resource(downloader, url, hxltags, folder, filename, resourcedata,
                                                              yearcol='year', quickcharts=quickcharts)
    if success is False:
        logger.warning('%s has no data!' % countryname)
        return None, None

    showcase = Showcase({
        'name': '%s-showcase' % slugified_name,
        'title': 'Dashboard for %s' % country['countryname'],
        'notes': 'Conflict Data Dashboard for %s' % country['countryname'],
        'url': 'https://www.acleddata.com/dashboard/#%03d' % country['m49'],
        'image_url': 'https://www.acleddata.com/wp-content/uploads/2018/01/dash.png'
    })
    showcase.add_tags(tags)
    return dataset, showcase

Exemple #14

0

Afficher le fichier

Fichier : idmc.py Projet : OCHA-DAP/hdx-scraper-idmc

def generate_country_dataset_and_showcase(downloader, folder, headersdata,
                                          countryiso, countrydata,
                                          indicator_datasets, tags):
    indicator_datasets_list = indicator_datasets.values()
    title = extract_list_from_list_of_dict(indicator_datasets_list, 'title')
    countryname = Country.get_country_name_from_iso3(countryiso)
    dataset = get_dataset('%s - %s' % (countryname, title[0]), tags,
                          'IDMC IDP data for %s' % countryname)
    try:
        dataset.add_country_location(countryiso)
    except HDXError as e:
        logger.exception('%s has a problem! %s' % (countryname, e))
        return None, None, None
    description = extract_list_from_list_of_dict(indicator_datasets_list,
                                                 'notes')
    dataset['notes'] = get_matching_then_nonmatching_text(description,
                                                          separator='\n\n',
                                                          ignore='\n')
    methodology = extract_list_from_list_of_dict(indicator_datasets_list,
                                                 'methodology_other')
    dataset['methodology_other'] = get_matching_then_nonmatching_text(
        methodology)
    caveats = extract_list_from_list_of_dict(indicator_datasets_list,
                                             'caveats')
    dataset['caveats'] = get_matching_then_nonmatching_text(caveats)

    years = set()
    bites_disabled = [True, True, True]
    for endpoint in countrydata:
        data = countrydata[endpoint]
        headers, hxltags = headersdata[endpoint]
        rows = [headers, hxltags]
        for row in data:
            newrow = list()
            for hxltag in hxltags:
                newrow.append(row.get(hxltag))
            rows.append(newrow)
            year = row.get('#date+year')
            conflict_stock = row.get('#affected+idps+ind+stock+conflict')
            if conflict_stock:
                bites_disabled[0] = False
            conflict_new = row.get('#affected+idps+ind+newdisp+conflict')
            if conflict_new:
                bites_disabled[1] = False
            disaster_new = row.get('#affected+idps+ind+newdisp+disaster')
            if disaster_new:
                bites_disabled[2] = False
            if year is None:
                continue
            years.add(year)
        name = indicator_datasets[endpoint].get_resources()[0]['description']
        resourcedata = {
            'name': endpoint,
            'description': '%s for %s' % (name, countryname)
        }
        filename = '%s_%s.csv' % (endpoint, countryname)
        dataset.generate_resource_from_rows(folder, filename, rows,
                                            resourcedata)
    years = sorted(list(years))
    dataset.set_dataset_year_range(years[0], years[-1])
    url = 'http://www.internal-displacement.org/countries/%s/' % countryname.replace(
        ' ', '-')
    try:
        downloader.setup(url)
    except DownloadError:
        altname = Country.get_country_info_from_iso3(
            countryiso)['#country+alt+i_en+name+v_unterm']
        url = 'http://www.internal-displacement.org/countries/%s/' % altname
        try:
            downloader.setup(url)
        except DownloadError:
            return dataset, None, bites_disabled
    showcase = Showcase({
        'name':
        '%s-showcase' % dataset['name'],
        'title':
        'IDMC %s Summary Page' % countryname,
        'notes':
        'Click the image on the right to go to the IDMC summary page for the %s dataset'
        % countryname,
        'url':
        url,
        'image_url':
        'http://www.internal-displacement.org/sites/default/files/logo_0.png'
    })
    showcase.add_tags(tags)
    return dataset, showcase, bites_disabled

Exemple #15

0

Afficher le fichier

Fichier : test_showcase.py Projet : EmmaArnold/hdx-python-api

 def test_get_all_showcases(self, configuration, allsearch):
     showcases = Showcase.get_all_showcases()
     assert len(showcases) == 20

Exemple #16

0

Afficher le fichier

Fichier : who.py Projet : muldercw/hdxscraper-who

def generate_dataset_and_showcase(base_url, downloader, countrydata, indicators):
    """
    http://apps.who.int/gho/athena/api/GHO/WHOSIS_000001.csv?filter=COUNTRY:BWA&profile=verbose
    """
    countryname = countrydata['display']
    title = '%s - Health Indicators' % countryname
    logger.info('Creating dataset: %s' % title)
    slugified_name = slugify('WHO data for %s' % countryname).lower()
    countryiso = countrydata['label']
    for attr in countrydata['attr']:
        if attr['category'] == 'ISO':
            countryiso = attr['value']
    dataset = Dataset({
        'name': slugified_name,
        'title': title,
    })
    dataset.set_maintainer('196196be-6037-4488-8b71-d786adf4c081')
    dataset.set_organization('hdx')
    dataset.set_expected_update_frequency('Every year')
    dataset.set_subnational(False)
    try:
        dataset.add_country_location(countryiso)
    except HDXError as e:
        logger.exception('%s has a problem! %s' % (countryname, e))
        return None, None
    tags = ['indicators']
    dataset.add_tags(tags)

    earliest_year = 10000
    latest_year = 0
    for indicator_code, indicator_name, indicator_url in indicators:
        no_rows = 0
        url = '%sGHO/%s.csv?filter=COUNTRY:%s&profile=verbose' % (base_url, indicator_code, countryiso)
        try:
            for row in downloader.get_tabular_rows(url, dict_rows=True, headers=1):
                no_rows += 1
                year = row['YEAR (CODE)']
                if '-' in year:
                    years = year.split('-')
                else:
                    years = [year]
                for year in years:
                    year = int(year)
                    if year < earliest_year:
                        earliest_year = year
                    if year > latest_year:
                        latest_year = year
        except Exception:
            continue
        if no_rows == 0:
            continue
        resource = {
            'name': indicator_name,
            'description': '[Indicator metadata](%s)' % indicator_url,
            'format': 'csv',
            'url': url
        }
        dataset.add_update_resource(resource)
    if len(dataset.get_resources()) == 0:
        logger.exception('%s has no data!' % countryname)
        return None, None
    dataset.set_dataset_year_range(earliest_year, latest_year)

    isolower = countryiso.lower()
    showcase = Showcase({
        'name': '%s-showcase' % slugified_name,
        'title': 'Indicators for %s' % countryname,
        'notes': 'Health indicators for %s' % countryname,
        'url': 'http://www.who.int/countries/%s/en/' % isolower,
        'image_url': 'http://www.who.int/sysmedia/images/countries/%s.gif' % isolower
    })
    showcase.add_tags(tags)
    return dataset, showcase

Exemple #17

0

Afficher le fichier

def generate_dataset_and_showcase(
    indicatorsetname,
    indicatorsets,
    country,
    countrymapping,
    showcase_base_url,
    filelist_url,
    downloader,
    folder,
):
    countryiso = country["iso3"]
    countryname = country["countryname"]
    indicatorset = indicatorsets[indicatorsetname]
    if indicatorsetname == "Prices":
        indicatorsetdisplayname = indicatorsetname
    else:
        indicatorsetdisplayname = f"{indicatorsetname} Indicators"
    title = f"{countryname} - {indicatorsetdisplayname}"
    name = f"FAOSTAT {indicatorsetdisplayname} for {countryname}"
    slugified_name = slugify(name).lower()
    logger.info(f"Creating dataset: {title}")
    dataset = Dataset({"name": slugified_name, "title": title})
    dataset.set_maintainer("196196be-6037-4488-8b71-d786adf4c081")
    dataset.set_organization("ed727a5b-3e6e-4cd6-b97e-4a71532085e6")
    dataset.set_expected_update_frequency("Every year")
    dataset.set_subnational(False)
    try:
        dataset.add_country_location(countryiso)
    except HDXError as e:
        logger.exception(f"{countryname} has a problem! {e}")
        return None, None, None, None
    tags = ["hxl", "indicators"]
    tag = indicatorsetname.lower()
    if " - " in tag:
        tags.extend(tag.split(" - "))
    else:
        tags.append(tag)
    dataset.add_tags(tags)

    def process_date(row):
        countrycode = row.get("Area Code")
        if countrycode is None:
            return None
        result = countrymapping.get(countrycode)
        if result is None:
            return None
        isolookup, _ = result
        if isolookup != countryiso:
            return None
        row["Iso3"] = countryiso
        year = row["Year"]
        month = row.get("Months")
        if month is not None and month != "Annual value":
            startdate, enddate = parse_date_range(f"{month} {year}")
        else:
            if "-" in year:
                yearrange = year.split("-")
                startdate, _ = parse_date_range(yearrange[0])
                _, enddate = parse_date_range(yearrange[1])
                row["Year"] = yearrange[1]
            else:
                startdate, enddate = parse_date_range(year)
        row["StartDate"] = startdate.strftime("%Y-%m-%d")
        row["EndDate"] = enddate.strftime("%Y-%m-%d")
        return {"startdate": startdate, "enddate": enddate}

    bites_disabled = [True, True, True]
    qc_indicators = None
    categories = list()
    for row in indicatorset:
        longname = row["DatasetName"]
        url = row["path"]
        category = longname.split(": ")[1]
        filename = f"{category}_{countryiso}.csv"
        description = f"*{category}:*\n{row['DatasetDescription']}"
        if category[-10:] == "Indicators":
            name = category
        else:
            name = f"{category} data"
        resourcedata = {
            "name": f"{name} for {countryname}",
            "description": description
        }
        header_insertions = [(0, "EndDate"), (0, "StartDate"), (0, "Iso3")]
        indicators_for_qc = row.get("quickcharts")
        if indicators_for_qc:
            quickcharts = {
                "hashtag":
                "#indicator+code",
                "values": [x["code"] for x in indicators_for_qc],
                "numeric_hashtag":
                "#indicator+value+num",
                "cutdown":
                2,
                "cutdownhashtags":
                ["#indicator+code", "#country+code", "#date+year"],
            }
            qc_indicators = indicators_for_qc
        else:
            quickcharts = None
        success, results = dataset.download_and_generate_resource(
            downloader,
            url,
            hxltags,
            folder,
            filename,
            resourcedata,
            header_insertions=header_insertions,
            date_function=process_date,
            quickcharts=quickcharts,
            encoding="WINDOWS-1252",
        )
        if success is False:
            logger.warning(f"{category} for {countryname} has no data!")
            continue
        disabled_bites = results.get("bites_disabled")
        if disabled_bites:
            bites_disabled = disabled_bites
        categories.append(category)

    if dataset.number_of_resources() == 0:
        logger.warning(f"{countryname} has no data!")
        return None, None, None, None
    dataset.quickcharts_resource_last()
    notes = [
        f"{indicatorsetdisplayname} for {countryname}.\n\n",
        f"Contains data from the FAOSTAT [bulk data service]({filelist_url})",
    ]
    if len(categories) == 1:
        notes.append(".")
    else:
        notes.append(
            f" covering the following categories: {', '.join(categories)}")
    dataset["notes"] = "".join(notes)

    showcase = Showcase({
        "name":
        f"{slugified_name}-showcase",
        "title":
        title,
        "notes":
        f"{indicatorsetname} Data Dashboard for {countryname}",
        "url":
        f"{showcase_base_url}{countryiso}",
        "image_url":
        "https://pbs.twimg.com/profile_images/1375385494167691269/Bc49-Yx8_400x400.jpg",
    })
    showcase.add_tags(tags)
    return dataset, showcase, bites_disabled, qc_indicators

Exemple #18

0

Afficher le fichier

Fichier : chathamhousedata.py Projet : OCHA-DAP/hdx-scraper-chathamhouse

def generate_dataset_resources_and_showcase(pop_types, today):
    title = 'Energy consumption of refugees and displaced people'
    slugified_name = slugify(title.lower())

    dataset = Dataset({
        'name': slugified_name,
        'title': title,
    })
    dataset.set_maintainer('196196be-6037-4488-8b71-d786adf4c081')
    dataset.set_organization('0c6bf79f-504c-4ba5-9fdf-c8cc893c8b2f')
    dataset.set_dataset_date_from_datetime(today)
    dataset.set_expected_update_frequency('Every month')
    dataset.add_other_location('world')

    tags = ['HXL', 'energy', 'refugees', 'internally displaced persons - idp']
    dataset.add_tags(tags)

    resources = list()
    for pop_type in pop_types:
        resource_data = {
            'name': '%s_consumption.csv' % pop_type.lower().replace(' ', '_'),
            'description': '%s %s' % (pop_type, title.lower()),
            'format': 'csv'
        }
        resources.append(Resource(resource_data))

    resource_data = {
        'name': 'population.csv',
        'description': 'UNHCR displaced population totals',
        'format': 'csv'
    }
    resources.append(Resource(resource_data))

    resource_data = {
        'name': 'keyfigures_disagg.csv',
        'description': 'Disaggregated MEI Key Figures',
        'format': 'csv'
    }
    resources.append(Resource(resource_data))

    resource_data = {
        'name': 'keyfigures.csv',
        'description': 'MEI Key Figures',
        'format': 'csv'
    }
    resources.append(Resource(resource_data))

    showcase = Showcase({
        'name':
        '%s-showcase' % slugified_name,
        'title':
        'Energy services for refugees and displaced people',
        'notes':
        'Click the image on the right to go to the energy services model',
        'url':
        'http://www.sciencedirect.com/science/article/pii/S2211467X16300396',
        'image_url':
        'https://ars.els-cdn.com/content/image/X2211467X.jpg'
    })
    showcase.add_tags(tags)

    return dataset, resources, showcase

Exemple #19

0

Afficher le fichier

Fichier : unhcr.py Projet : OCHA-DAP/hdx-scraper-unhcr-population

def generate_dataset_and_showcase(folder, country, countrydata, qc_rows,
                                  headers, resources, fields):
    """ """
    countryiso = country["iso3"]
    countryname = country["countryname"]
    title_text = "Data on forcibly displaced populations and stateless persons"
    if countryname == "World":
        title = f"{title_text} (Global)"
    else:
        title = f"{countryname} - {title_text}"
    logger.info(f"Creating dataset: {title}")
    slugified_name = slugify(f"UNHCR Population Data for {countryiso}").lower()
    dataset = Dataset({"name": slugified_name, "title": title})
    dataset.set_maintainer("8d70b12b-7247-48d2-b426-dbb4bf82eb7c")
    dataset.set_organization("abf4ca86-8e69-40b1-92f7-71509992be88")
    dataset.set_expected_update_frequency("Every six months")
    dataset.set_subnational(True)
    if countryiso == WORLD:
        dataset.add_other_location("world")
    else:
        # Check for unknown country names
        try:
            dataset.add_country_location(countryiso)
        except HDXError:
            logger.error(f"{countryname} ({countryiso})  not recognised!")
            return None, None, None

    tags = ["hxl", "refugees", "asylum", "population"]
    dataset.add_tags(tags)

    # Filter the quick chart data to only include the relevant data for the current country
    qcRowSubset = SubsetQuickChartData(country, qc_rows)

    def process_dates(row):
        year = int(row["Year"])
        startdate = datetime(year, 1, 1)
        # For mid-year data it should be 30-June...
        # enddate = datetime(year, 12, 31)
        if IS_ASR is False and year == LATEST_YEAR:
            enddate = datetime(year, 6, 30)
        else:
            enddate = datetime(year, 12, 31)
        return {"startdate": startdate, "enddate": enddate}

    earliest_startdate = None
    latest_enddate = None
    for resource_name, resource_rows in countrydata.items():
        resource_id = "_".join(resource_name.split("_")[:-1])
        originating_residing = resource_name.split("_")[
            -1]  # originating or residing
        record = resources[resource_id]

        if (
                countryiso == WORLD
        ):  # refugees and asylum applicants contain the same data for WORLD
            if originating_residing == "originating":
                continue
        format_parameters = dict(countryiso=countryiso.lower(),
                                 countryname=countryname)
        filename = f"{resource_name}_{countryiso}.csv"
        resourcedata = {
            "name":
            record[originating_residing]["title"].format(**format_parameters),
            "description":
            record[originating_residing]["description"].format(
                **format_parameters),
        }
        resourcedata["name"] = resourcedata["name"].replace(
            "residing in World", "(Global)")
        rowit = RowIterator(headers[resource_name],
                            resource_rows).with_fields(fields)
        success, results = dataset.generate_resource_from_iterator(
            rowit.headers(),
            rowit,
            rowit.hxltags_mapping(),
            folder,
            filename,
            resourcedata,
            date_function=process_dates,
            encoding="utf-8",
        )

        if success is False:
            logger.warning(f"{countryname} - {resource_name}  has no data!")
        else:
            startdate = results["startdate"]
            if earliest_startdate is None or startdate < earliest_startdate:
                earliest_startdate = startdate
            enddate = results["enddate"]
            if latest_enddate is None or enddate > latest_enddate:
                latest_enddate = enddate

    if len(dataset.get_resources()) == 0:
        logger.error(f"{countryname}  has no data!")
        return None, None, None
    dataset.set_date_of_dataset(earliest_startdate, latest_enddate)
    bites_disabled = [True, True, True]
    if countryiso != WORLD:
        filename = "qc_data.csv"
        resourcedata = {
            "name": filename,
            "description": f"QuickCharts data for {countryname}",
        }

        rowit = (ListIterator(
            data=list(qcRowSubset.values()),
            headers=[
                "Year",
                "ISO3CoO",
                "CoO_name",
                "ISO3CoA",
                "CoA_name",
                "Displaced From",
                "Displaced Stateless Within",
                "Displaced Stateless From",
            ],
        ).auto_headers().to_list_iterator())
        years = sorted(set(rowit.column("Year")))[-10:]  # Last 10 years
        headers = rowit.headers()
        rowit = (
            rowit.select(lambda row, years=years: row.get("Year") in years
                         )  # Restrict data to only last 10 years
            .with_sum_field(
                "Displaced From",
                "#affected+displaced+outgoing",
                [
                    x for x in headers
                    if x.startswith(("REF", "ASY",
                                     "VDA")) and x.endswith("_outgoing")
                ],
            ).with_sum_field(
                "Displaced Stateless Within",
                "#affected+displaced+stateless+incoming",
                [
                    x for x in headers
                    if x.startswith(("REF", "ASY", "IDP", "VDA",
                                     "STA")) and x.endswith("_incoming")
                ],
            ).with_sum_field(
                "Displaced Stateless From",
                "#affected+displaced+stateless+outgoing",
                [
                    x for x in headers
                    if x.startswith(("REF", "ASY", "IDP", "VDA",
                                     "STA")) and x.endswith("_outgoing")
                ],
            ).with_fields(fields))

        for row in rowit:
            if (row["Country of Origin Code"] == countryiso
                    and row["Displaced From"] > 0):
                bites_disabled[0] = False
            if row["Year"] != years[-1]:
                continue
            if (row["Country of Asylum Code"] == countryiso
                    and row["Displaced Stateless Within"] > 0):
                bites_disabled[1] = False
            if (row["Country of Origin Code"] == countryiso
                    and row["Displaced Stateless From"] > 0):
                bites_disabled[2] = False

        rowit.reset()
        success, results = dataset.generate_resource_from_iterator(
            rowit.headers(),
            rowit,
            rowit.hxltags_mapping(),
            folder,
            filename,
            resourcedata,
            date_function=process_dates,
            encoding="utf-8",
        )
        if success is False:
            logger.warning(
                f"QuickCharts {countryname} - {filename}  has no data!")
    showcase = Showcase({
        "name":
        f"{slugified_name}-showcase",
        "title":
        title,
        "notes":
        f"UNHCR Population Data Dashboard for {countryname}",
        "url":
        "https://www.unhcr.org/refugee-statistics/",
        "image_url":
        "https://www.unhcr.org/assets/img/unhcr-logo.png",
    })
    showcase.add_tags(tags)
    return dataset, showcase, bites_disabled

Exemple #20

0

Afficher le fichier

Fichier : test_showcase.py Projet : OCHA-DAP/hdx-python-api

 def test_tags(self, configuration):
     showcase_data = copy.deepcopy(TestShowcase.showcase_data)
     showcase = Showcase(showcase_data)
     assert showcase.get_tags() == ['economy', 'health']
     showcase.add_tag('wash')
     assert showcase.get_tags() == ['economy', 'health', 'wash']
     showcase.add_tags(['sanitation'])
     assert showcase.get_tags() == ['economy', 'health', 'wash', 'sanitation']
     result = showcase.remove_tag('wash')
     assert result is True
     assert showcase.get_tags() == ['economy', 'health', 'sanitation']
     showcase['tags'] = None
     result = showcase.remove_tag('wash')
     assert result is False

Exemple #21

0

Afficher le fichier

    def test_update_in_hdx(self, configuration, post_update):
        showcase = Showcase()
        showcase['id'] = 'NOTEXIST'
        with pytest.raises(HDXError):
            showcase.update_in_hdx()
        showcase['title'] = 'LALA'
        with pytest.raises(HDXError):
            showcase.update_in_hdx()

        showcase = Showcase.read_from_hdx('TEST1')
        assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746'
        assert showcase['title'] == 'MyShowcase1'

        showcase['name'] = 'TEST1'
        showcase['notes'] = 'lalalala'
        showcase.update_in_hdx()
        assert showcase['name'] == 'TEST1'
        assert showcase['notes'] == 'lalalala'
        expected = copy.deepcopy(showcase_resultdict)
        expected['notes'] = 'lalalala'
        expected['name'] = 'TEST1'
        assert showcase.get_old_data_dict() == expected

        showcase['name'] = 'NOTEXIST'
        with pytest.raises(HDXError):
            showcase.update_in_hdx()

        del showcase['name']
        with pytest.raises(HDXError):
            showcase.update_in_hdx()

        showcase_data = copy.deepcopy(TestShowcase.showcase_data)
        showcase_data['title'] = 'MyShowcase1'
        showcase_data['name'] = 'TEST1'
        showcase = Showcase(showcase_data)
        showcase.create_in_hdx()
        assert showcase['name'] == 'TEST1'
        assert showcase['notes'] == 'My Showcase'

Exemple #22

0

Afficher le fichier

Fichier : test_showcase.py Projet : OCHA-DAP/hdx-python-api

 def test_delete_from_hdx(self, configuration, post_delete):
     showcase = Showcase.read_from_hdx('05e392bf-04e0-4ca6-848c-4e87bba10746')
     showcase.delete_from_hdx()
     del showcase['id']
     with pytest.raises(HDXError):
         showcase.delete_from_hdx()

Exemple #23

0

Afficher le fichier

def generate_datasets_and_showcases(downloader, folder, indicatorname,
                                    indicatortypedata, countriesdata,
                                    showcase_base_url):
    dataset_template = Dataset()
    dataset_template.set_maintainer('196196be-6037-4488-8b71-d786adf4c081')
    dataset_template.set_organization('ed727a5b-3e6e-4cd6-b97e-4a71532085e6')
    dataset_template.set_expected_update_frequency('Every year')
    dataset_template.set_subnational(False)
    tags = ['hxl', indicatorname.lower()]
    dataset_template.add_tags(tags)

    earliest_year = 10000
    latest_year = 0
    countrycode = None
    iso3 = None
    countryname = None
    rows = None
    datasets = list()
    showcases = list()

    def output_csv():
        if rows is None:
            return
        headers = deepcopy(downloader.response.headers)
        for i, header in enumerate(headers):
            if 'year' in header.lower():
                headers.insert(i, 'EndYear')
                headers.insert(i, 'StartYear')
                break
        headers.insert(0, 'Iso3')
        hxlrow = dict()
        for header in headers:
            hxlrow[header] = hxltags.get(header, '')
        rows.insert(0, hxlrow)
        filepath = join(folder, '%s_%s.csv' % (indicatorname, countrycode))
        write_list_to_csv(rows, filepath, headers=headers)
        ds = datasets[-1]
        ds.set_dataset_year_range(earliest_year, latest_year)
        ds.resources[0].set_file_to_upload(filepath)

    for row in downloader.get_tabular_rows(indicatortypedata['FileLocation'],
                                           dict_rows=True,
                                           headers=1,
                                           format='csv',
                                           encoding='WINDOWS-1252'):
        newcountry = row['Area Code']
        if newcountry != countrycode:
            output_csv()
            rows = None
            countrycode = newcountry
            result = countriesdata.get(countrycode)
            if result is None:
                logger.warning('Ignoring %s' % countrycode)
                continue
            iso3, cn = result
            countryname = Country.get_country_name_from_iso3(iso3)
            if countryname is None:
                logger.error('Missing country %s: %s, %s' %
                             (countrycode, cn, iso3))
                continue
            rows = list()
            title = '%s - %s Indicators' % (countryname, indicatorname)
            logger.info('Generating dataset: %s' % title)
            name = 'FAOSTAT %s indicators for %s' % (countryname,
                                                     indicatorname)
            slugified_name = slugify(name).lower()
            dataset = Dataset(deepcopy(dataset_template.data))
            dataset['name'] = slugified_name
            dataset['title'] = title
            dataset.update_from_yaml()
            dataset.add_country_location(countryname)
            earliest_year = 10000
            latest_year = 0

            resource = Resource({'name': title, 'description': ''})
            resource.set_file_type('csv')
            dataset.add_update_resource(resource)
            datasets.append(dataset)
            showcase = Showcase({
                'name':
                '%s-showcase' % slugified_name,
                'title':
                title,
                'notes':
                dataset['notes'],
                'url':
                '%s%s' % (showcase_base_url, countrycode),
                'image_url':
                'http://www.fao.org/uploads/pics/food-agriculture.png'
            })
            showcase.add_tags(tags)
            showcases.append(showcase)
        row['Iso3'] = iso3
        row['Area'] = countryname
        year = row['Year']
        if '-' in year:
            years = year.split('-')
            row['StartYear'] = years[0]
            row['EndYear'] = years[1]
        else:
            years = [year]
            row['StartYear'] = year
            row['EndYear'] = year
        for year in years:
            year = int(year)
            if year < earliest_year:
                earliest_year = year
            if year > latest_year:
                latest_year = year
        if rows is not None:
            rows.append(row)
    output_csv()
    return datasets, showcases

Exemple #24

0

Afficher le fichier

Fichier : test_showcase.py Projet : OCHA-DAP/hdx-python-api

    def test_update_in_hdx(self, configuration, post_update):
        showcase = Showcase()
        showcase['id'] = 'NOTEXIST'
        with pytest.raises(HDXError):
            showcase.update_in_hdx()
        showcase['title'] = 'LALA'
        with pytest.raises(HDXError):
            showcase.update_in_hdx()

        showcase = Showcase.read_from_hdx('05e392bf-04e0-4ca6-848c-4e87bba10746')
        assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746'
        assert showcase['title'] == 'MyShowcase1'

        showcase['name'] = 'TEST1'
        showcase['notes'] = 'lalalala'
        showcase.update_in_hdx()
        assert showcase['name'] == 'TEST1'
        assert showcase['notes'] == 'lalalala'
        expected = copy.deepcopy(showcase_resultdict)
        expected['notes'] = 'lalalala'
        expected['name'] = 'TEST1'
        assert showcase.get_old_data_dict() == expected

        showcase['name'] = 'NOTEXIST'
        with pytest.raises(HDXError):
            showcase.update_in_hdx()

        del showcase['name']
        with pytest.raises(HDXError):
            showcase.update_in_hdx()

        showcase_data = copy.deepcopy(TestShowcase.showcase_data)
        showcase_data['title'] = 'MyShowcase1'
        showcase_data['name'] = 'TEST1'
        showcase = Showcase(showcase_data)
        showcase.create_in_hdx()
        assert showcase['name'] == 'TEST1'
        assert showcase['notes'] == 'My Showcase'

Exemple #25

0

Afficher le fichier

Fichier : test_showcase.py Projet : OCHA-DAP/hdx-python-api

    def test_create_in_hdx(self, configuration, post_create):
        showcase = Showcase()
        with pytest.raises(HDXError):
            showcase.create_in_hdx()
        showcase['id'] = '05e392bf-04e0-4ca6-848c-4e87bba10746'
        showcase['title'] = 'LALA'
        with pytest.raises(HDXError):
            showcase.create_in_hdx()

        showcase_data = copy.deepcopy(TestShowcase.showcase_data)
        showcase = Showcase(showcase_data)
        showcase.create_in_hdx()
        assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746'

        showcase_data['title'] = 'MyShowcase2'
        showcase = Showcase(showcase_data)
        with pytest.raises(HDXError):
            showcase.create_in_hdx()

        showcase_data['title'] = 'MyShowcase3'
        showcase = Showcase(showcase_data)
        with pytest.raises(HDXError):
            showcase.create_in_hdx()

Exemple #26

0

Afficher le fichier

Fichier : wfpfood.py Projet : Metasebya/hdx-scraper-wfp-foodprices

def generate_joint_dataset_and_showcase(wfpfood_url, downloader, folder,
                                        countriesdata):
    """Generate single joint datasets and showcases containing data for all countries.
    """
    title = 'Global Food Prices Database (WFP)'
    logger.info('Creating joint dataset: %s' % title)
    slugified_name = 'wfp-food-prices'

    df = joint_dataframe(wfpfood_url, downloader, countriesdata)

    if len(df) <= 1:
        logger.warning('Dataset "%s" is empty' % title)
        return None, None

    dataset = Dataset({'name': slugified_name, 'title': title})
    dataset.set_maintainer(
        "9957c0e9-cd38-40f1-900b-22c91276154b")  # Orest Dubay
    #    dataset.set_maintainer("154de241-38d6-47d3-a77f-0a9848a61df3")
    dataset.set_organization("3ecac442-7fed-448d-8f78-b385ef6f84e7")

    maxmonth = (100 * df.mp_year + df.mp_month).max() % 100
    dataset.set_dataset_date("%04d-01-01" % df.mp_year.min(),
                             "%04d-%02d-15" % (df.mp_year.max(), maxmonth),
                             "%Y-%m-%d")
    dataset.set_expected_update_frequency("weekly")
    dataset.add_country_locations(sorted(df.adm0_name.unique()))
    dataset.add_tags(tags)

    file_csv = join(folder, "WFPVAM_FoodPrices.csv")
    df.to_csv(file_csv, index=False)
    resource = Resource({
        'name':
        title,
        'description':
        "Word Food Programme – Food Prices  Data Source: WFP Vulnerability Analysis and Mapping (VAM)."
    })
    resource.set_file_type('csv')  # set the file type to eg. csv
    resource.set_file_to_upload(file_csv)
    dataset.add_update_resource(resource)

    showcase = Showcase({
        'name':
        '%s-showcase' % slugified_name,
        'title':
        'Global Food Prices',
        'notes':
        "Interactive data visualisation of WFP's Food Market Prices dataset",
        'url':
        "https://data.humdata.org/organization/wfp#interactive-data",
        'image_url':
        "https://docs.humdata.org/wp-content/uploads/wfp_food_prices_data_viz.gif"
    })
    showcase.add_tags(tags)

    dataset.update_from_yaml()
    dataset['notes'] = dataset[
        'notes'] % 'Global Food Prices data from the World Food Programme covering'
    dataset.create_in_hdx()
    showcase.create_in_hdx()
    showcase.add_dataset(dataset)
    dataset.get_resource().create_datastore_from_yaml_schema(
        yaml_path="wfp_food_prices.yml", path=file_csv)
    logger.info('Finished joint dataset')

    return dataset, showcase

Exemple #27

0

Afficher le fichier

def generate_dataset_and_showcase(folder, country, countrydata, headers):
    """
    """
    countryiso = country['iso3']
    countryname = country['countryname']
    title = '%s - Conflict Data' % countryname
    logger.info('Creating dataset: %s' % title)
    slugified_name = slugify('UCDP Data for %s' % countryname).lower()
    dataset = Dataset({
        'name': slugified_name,
        'title': title,
    })
    dataset.set_maintainer('196196be-6037-4488-8b71-d786adf4c081')
    dataset.set_organization('hdx')
    dataset.set_expected_update_frequency('As needed')
    dataset.set_subnational(True)
    dataset.add_country_location(countryiso)
    tags = ['hxl', 'violence and conflict', 'protests', 'security incidents']
    dataset.add_tags(tags)

    filename = 'conflict_data_%s.csv' % countryiso
    resourcedata = {
        'name': 'Conflict Data for %s' % countryname,
        'description': 'Conflict data with HXL tags'
    }

    def process_year(years, row):
        start_year = int(row['date_start'][:4])
        end_year = int(row['date_end'][:4])
        years.add(start_year)
        years.add(end_year)
        row['start_year'] = start_year
        row['end_year'] = end_year

    quickcharts = {
        'cutdown': 2,
        'cutdownhashtags':
        ['#date+year+end', '#adm1+name', '#affected+killed']
    }
    success, results = dataset.generate_resource_from_download(
        headers,
        countrydata,
        hxltags,
        folder,
        filename,
        resourcedata,
        year_function=process_year,
        quickcharts=quickcharts)
    if success is False:
        logger.warning('%s has no data!' % countryname)
        return None, None

    showcase = Showcase({
        'name':
        '%s-showcase' % slugified_name,
        'title':
        title,
        'notes':
        'Conflict Data Dashboard for %s' % countryname,
        'url':
        'https://ucdp.uu.se/#country/%s' % countrydata[0]['country_id'],
        'image_url':
        'https://pbs.twimg.com/profile_images/832251660718178304/y-LWa5iK_200x200.jpg'
    })
    showcase.add_tags(tags)
    return dataset, showcase

Exemple #28

0

Afficher le fichier

Fichier : wfpfood.py Projet : Metasebya/hdx-scraper-wfp-foodprices

def generate_dataset_and_showcase(wfpfood_url, downloader, folder, countrydata,
                                  shortcuts):
    """Generate datasets and showcases for each country.
    """
    title = '%s - Food Prices' % countrydata['name']
    logger.info('Creating dataset: %s' % title)
    name = 'WFP food prices for %s' % countrydata[
        'name']  #  Example name which should be unique so can include organisation name and country
    slugified_name = slugify(name).lower()

    df = read_dataframe(wfpfood_url, downloader, countrydata)

    if len(df) <= 1:
        logger.warning('Dataset "%s" is empty' % title)
        return None, None

    dataset = Dataset({
        'name': slugified_name,
        'title': title,
        "dataset_preview": "resource_id"
    })
    dataset.set_maintainer(
        "9957c0e9-cd38-40f1-900b-22c91276154b")  # Orest Dubay
    #    dataset.set_maintainer("154de241-38d6-47d3-a77f-0a9848a61df3")
    dataset.set_organization("3ecac442-7fed-448d-8f78-b385ef6f84e7")

    dataset.set_dataset_date(df.loc[1:].date.min(), df.loc[1:].date.max(),
                             "%Y-%m-%d")
    dataset.set_expected_update_frequency("weekly")
    dataset.add_country_location(countrydata["name"])
    dataset.set_subnational(True)
    dataset.add_tags(tags)
    dataset.add_tag('hxl')

    file_csv = join(
        folder,
        "WFP_food_prices_%s.csv" % countrydata["name"].replace(" ", "-"))
    df.to_csv(file_csv, index=False)
    resource = Resource({
        'name': title,
        "dataset_preview_enabled": "False",
        'description': "Food prices data with HXL tags"
    })
    resource.set_file_type('csv')  # set the file type to eg. csv
    resource.set_file_to_upload(file_csv)
    dataset.add_update_resource(resource)

    df1 = quickchart_dataframe(df, shortcuts)
    file_csv = join(
        folder, "WFP_food_median_prices_%s.csv" %
        countrydata["name"].replace(" ", "-"))
    df1.to_csv(file_csv, index=False)
    resource = Resource({
        'name':
        '%s - Food Median Prices' % countrydata['name'],
        "dataset_preview_enabled":
        "True",
        'description':
        """Food median prices data with HXL tags.
Median of all prices for a given commodity observed on different markets is shown, together with the market where
it was observed. Data are shortened in multiple ways:

- Rather that prices on all markets, only median price across all markets is shown, together with the market
  where it has been observed.
- Only food commodities are displayed (non-food commodities like fuel and wages are not shown).
- Only data after %s are shown. Missing data are interpolated.
- Column with shorter commodity names "cmnshort" are available to be used as chart labels.
- Units are adapted and prices are rescaled in order to yield comparable values (so that they
  can be displayed and compared in a single chart). Scaling factor is present in scaling column.
  Label with full commodity name and a unit (with scale if applicable) is in column "label".  

This reduces the amount of data and allows to make cleaner charts.
""" % (df1.loc[1:].date.min())
    })
    resource.set_file_type('csv')  # set the file type to eg. csv
    resource.set_file_to_upload(file_csv)
    dataset.add_update_resource(resource)

    showcase = Showcase({
        'name':
        '%s-showcase' % slugified_name,
        'title':
        title + " showcase",
        'notes':
        countrydata["name"] +
        " food prices data from World Food Programme displayed through VAM Economic Explorer",
        'url':
        "http://dataviz.vam.wfp.org/economic_explorer/prices?adm0=" +
        countrydata["code"],
        'image_url':
        "http://dataviz.vam.wfp.org/_images/home/economic_2-4.jpg"
    })
    showcase.add_tags(tags)
    return dataset, showcase

Exemple #29

0

Afficher le fichier

    def test_create_in_hdx(self, configuration, post_create):
        showcase = Showcase()
        with pytest.raises(HDXError):
            showcase.create_in_hdx()
        showcase['id'] = 'TEST1'
        showcase['title'] = 'LALA'
        with pytest.raises(HDXError):
            showcase.create_in_hdx()

        showcase_data = copy.deepcopy(TestShowcase.showcase_data)
        showcase = Showcase(showcase_data)
        showcase.create_in_hdx()
        assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746'

        showcase_data['title'] = 'MyShowcase2'
        showcase = Showcase(showcase_data)
        with pytest.raises(HDXError):
            showcase.create_in_hdx()

        showcase_data['title'] = 'MyShowcase3'
        showcase = Showcase(showcase_data)
        with pytest.raises(HDXError):
            showcase.create_in_hdx()

Exemple #30

0

Afficher le fichier

Fichier : run.py Projet : OCHA-DAP/unosatdb_to_hdx

def make_hdx_entries(start_date, **params):
    logger.info('Adding any datasets created or updated after %s' %
                start_date.date().isoformat())

    # Connect to the database
    connection = pymysql.connect(**params)
    try:
        with connection.cursor() as cursor:
            # Read all countries
            sql = "SELECT * FROM `area`"
            cursor.execute(sql)
            unosatCountryCodes = dict()
            for unosatCountryCode in cursor:
                unosatCountryCodes[unosatCountryCode[
                    'id_area']] = unosatCountryCode['area_iso3']
            # Read a multiple records
            sql = "SELECT * FROM `product` WHERE NOT (GDB_Link LIKE '' AND SHP_Link LIKE '') AND (product_archived IS FALSE) AND (product_created>%s or updated>%s)"
            cursor.execute(sql, (start_date, start_date))
            if not cursor.rowcount:
                raise UNOSATError('No db results found')
            batch = get_uuid()
            for unosatDBEntry in cursor:
                if not unosatDBEntry:
                    raise UNOSATError('Empty row in db!')
                productID = str(unosatDBEntry['id_product'])
                logger.info('Processing UNOSAT product %s' % productID)
                logger.debug(unosatDBEntry)
                id_area = unosatDBEntry['id_area']
                iso3 = unosatCountryCodes[id_area]
                product_glide = unosatDBEntry['product_glide']
                # logger.info('product_glide = %s' % product_glide)
                typetag = product_glide[:2]
                product_description = unosatDBEntry['product_description']
                if '-' in product_glide:
                    glideiso3 = product_glide.split('-')[3]
                    product_description = '**Glide code: %s**  %s' % (
                        product_glide, product_description)
                else:
                    glideiso3 = product_glide[10:13]
                    product_description = '**UNOSAT code: %s**  %s' % (
                        product_glide, product_description)

                if iso3 != glideiso3:
                    raise UNOSATError(
                        'UNOSAT id_area=%s, area_iso3=%s does not match glide iso3=%s'
                        % (id_area, iso3, glideiso3))

                # Dataset variables
                title = unosatDBEntry['product_title']
                slugified_name = slugify(title)
                if len(slugified_name) > 90:
                    slugified_name = slugified_name.replace(
                        'satellite-detected-', '')
                    slugified_name = slugified_name.replace(
                        'estimation-of-', '')
                    slugified_name = slugified_name.replace('geodata-of-',
                                                            '')[:90]
                event_type = standardEventTypesDict[typetag]
                tags = ['geodata']
                if event_type:
                    tags.append(event_type)

                dataset = Dataset({
                    'name': slugified_name,
                    'title': title,
                    'notes': product_description
                })
                dataset.set_maintainer('83fa9515-3ba4-4f1d-9860-f38b20f80442')
                dataset.add_country_location(iso3)
                dataset.add_tags(tags)
                dataset.set_expected_update_frequency('Never')
                dataset.set_dataset_date_from_datetime(
                    unosatDBEntry['product_created'])

                gdb_link = unosatDBEntry['GDB_Link']
                bitsgdb = gdb_link.split('/')
                shp_link = unosatDBEntry['SHP_Link']
                bitsshp = shp_link.split('/')

                resources = [{
                    'name': bitsgdb[len(bitsgdb) - 1],
                    'format': 'zipped geodatabase',
                    'url': gdb_link,
                    'description': 'Zipped geodatabase',
                }, {
                    'name': bitsshp[len(bitsshp) - 1],
                    'format': 'zipped shapefile',
                    'url': shp_link,
                    'description': 'Zipped shapefile',
                }]

                dataset.add_update_resources(resources)
                dataset.update_from_yaml()

                showcase = Showcase({
                    'name':
                    '%s-showcase' % slugified_name,
                    'title':
                    'Static PDF Map',
                    'notes':
                    'Static viewing map for printing.',
                    'url':
                    'https://unosat-maps.web.cern.ch/unosat-maps/%s/%s' %
                    (unosatDBEntry['product_folder'],
                     unosatDBEntry['product_url1']),
                    'image_url':
                    'https://unosat-maps.web.cern.ch/unosat-maps/%s/%s' %
                    (unosatDBEntry['product_folder'],
                     unosatDBEntry['product_img'])
                })
                showcase.add_tags(tags)

                dataset.create_in_hdx(remove_additional_resources=True,
                                      hxl_update=False,
                                      updated_by_script='UNOSAT',
                                      batch=batch)
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)

                with open('publishlog.txt', 'a+') as f:
                    f.write('%s,%s\n' % (productID, dataset.get_hdx_url()))
                    f.close()
    finally:
        connection.close()

Exemple #31

0

Afficher le fichier

 def test_delete_from_hdx(self, configuration, post_delete):
     showcase = Showcase.read_from_hdx('TEST1')
     showcase.delete_from_hdx()
     del showcase['id']
     with pytest.raises(HDXError):
         showcase.delete_from_hdx()

Exemple #32

0

Afficher le fichier

Fichier : test_showcase.py Projet : EmmaArnold/hdx-python-api

    def test_update_in_hdx(self, configuration, post_update):
        showcase = Showcase()
        showcase['id'] = 'NOTEXIST'
        with pytest.raises(HDXError):
            showcase.update_in_hdx()
        showcase['title'] = 'LALA'
        with pytest.raises(HDXError):
            showcase.update_in_hdx()

        showcase = Showcase.read_from_hdx(
            '05e392bf-04e0-4ca6-848c-4e87bba10746')
        assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746'
        assert showcase['title'] == 'MyShowcase1'

        showcase['name'] = 'TEST1'
        showcase['notes'] = 'lalalala'
        showcase.update_in_hdx()
        assert showcase['name'] == 'TEST1'
        assert showcase['notes'] == 'lalalala'
        assert showcase['state'] == 'active'
        expected = copy.deepcopy(showcase_resultdict)
        expected['notes'] = 'lalalala'
        expected['name'] = 'TEST1'
        expected['tags'] = [{
            'name':
            'economics',
            'vocabulary_id':
            '4381925f-0ae9-44a3-b30d-cae35598757b'
        }, {
            'name':
            'health',
            'vocabulary_id':
            '4381925f-0ae9-44a3-b30d-cae35598757b'
        }]
        assert showcase.get_old_data_dict() == expected

        showcase['name'] = 'NOTEXIST'
        with pytest.raises(HDXError):
            showcase.update_in_hdx()

        del showcase['name']
        with pytest.raises(HDXError):
            showcase.update_in_hdx()

        showcase_data = copy.deepcopy(TestShowcase.showcase_data)
        showcase_data['title'] = 'MyShowcase1'
        showcase_data['name'] = 'TEST1'
        showcase = Showcase(showcase_data)
        showcase.create_in_hdx()
        assert showcase['name'] == 'TEST1'
        assert showcase['notes'] == 'My Showcase'
        assert showcase['state'] == 'active'

Exemple #33

0

Afficher le fichier

 def test_tags(self, configuration):
     showcase_data = copy.deepcopy(TestShowcase.showcase_data)
     showcase = Showcase(showcase_data)
     assert showcase.get_tags() == ['economy', 'health']
     showcase.add_tag('wash')
     assert showcase.get_tags() == ['economy', 'health', 'wash']
     showcase.add_tags(['sanitation'])
     assert showcase.get_tags() == [
         'economy', 'health', 'wash', 'sanitation'
     ]
     result = showcase.remove_tag('wash')
     assert result is True
     assert showcase.get_tags() == ['economy', 'health', 'sanitation']
     showcase['tags'] = None
     result = showcase.remove_tag('wash')
     assert result is False

Exemple #34

0

Afficher le fichier

    def generate_dataset_and_showcase(self, countryiso3, folder):
        countryname = Country.get_country_name_from_iso3(countryiso3)
        title = f'{countryname} - Food Prices'
        logger.info(f'Creating dataset: {title}')
        name = f'WFP food prices for {countryname}'
        slugified_name = slugify(name).lower()

        dataset = Dataset({
            'name': slugified_name,
            'title': title,
        })
        dataset.set_maintainer('f1921552-8c3e-47e9-9804-579b14a83ee3')
        dataset.set_organization('3ecac442-7fed-448d-8f78-b385ef6f84e7')

        dataset.set_expected_update_frequency('weekly')
        dataset.add_country_location(countryname)
        dataset.set_subnational(True)
        tags = ['commodities', 'prices', 'markets', 'hxl']
        dataset.add_tags(tags)

        prices_data = self.get_list('MarketPrices/PriceMonthly', countryiso3)
        if not prices_data:
            logger.info(f'{countryiso3} has no prices data!')
            return None, None, None
        market_to_adm = dict()
        for market in self.get_list('Markets/List', countryiso3):
            market_to_adm[market['marketId']] = market['admin1Name'], market['admin2Name'], market['marketLatitude'],\
                                                market['marketLongitude']

        rows = dict()
        sources = dict()
        markets = dict()
        for price_data in prices_data:
            if price_data['commodityPriceFlag'] not in ('actual', 'aggregate'):
                continue
            date = price_data['commodityPriceDate']
            category = self.commodity_to_category[price_data['commodityID']]
            market = price_data['marketName']
            if market == 'National Average':
                adm1 = adm2 = lat = lon = ''
            else:
                market_id = price_data['marketID']
                if market_id in market_to_adm:
                    adm1, adm2, lat, lon = market_to_adm[market_id]
                else:
                    adm1 = adm2 = lat = lon = ''
            orig_source = price_data['commodityPriceSourceName'].replace(
                'M/o', 'Ministry of').replace('+', '/')
            regex = r'Government.*,(Ministry.*)'
            match = re.search(regex, orig_source)
            if match:
                split_sources = [match.group(1)]
            else:
                split_sources = orig_source.replace(',', '/').replace(
                    ';', '/').split('/')
            for source in split_sources:
                source = source.strip()
                if not source:
                    continue
                if source[-1] == '.':
                    source = source[:-1]
                source_lower = source.lower()
                if 'mvam' in source_lower and len(source_lower) <= 8:
                    source = 'WFP mVAM'
                elif '?stica' in source:
                    source = source.replace('?stica', 'ística')
                source_lower = source.lower()
                if not self.match_source(sources.keys(), source_lower):
                    sources[source_lower] = source
            commodity = price_data['commodityName']
            unit = price_data['commodityUnitName']
            price = price_data['commodityPrice']
            currency = price_data['currencyName']
            pricetype = price_data['commodityPriceFlag']
            key = date, adm1, adm2, market, category, commodity, unit
            rows[key] = {
                'date': date,
                'adm1name': adm1,
                'adm2name': adm2,
                'market': market,
                'latitude': lat,
                'longitude': lon,
                'category': category,
                'commodity': commodity,
                'unit': unit,
                'currency': currency,
                'pricetype': pricetype,
                'price': price
            }
            if adm1 and adm2 and category:
                adm1adm2market = adm1, adm2, market
                commodities = markets.get(adm1adm2market, dict())
                dict_of_lists_add(commodities, (commodity, unit, currency),
                                  (date, price))
                markets[adm1adm2market] = commodities
        if not rows:
            logger.info(f'{countryiso3} has no prices!')
            return None, None, None
        number_market = list()
        for key, commodities in markets.items():
            number_market.append((len(commodities), key))
        number_market = sorted(number_market, reverse=True)
        qc_indicators = list()
        qc_rows = [qc_hxltags]
        chosen_commodities = set()
        # Go through markets starting with the one with most commodities
        for _, adm1adm2market in number_market:
            commodities = markets[adm1adm2market]
            number_commodity = list()
            for commodityunitcurrency, details in commodities.items():
                number_commodity.append((len(details), commodityunitcurrency))
            number_commodity = sorted(number_commodity, reverse=True)
            index = 0
            # Pick commodity with most rows that has not already been used for another market
            commodity, unit, currency = number_commodity[index][1]
            while commodity in chosen_commodities:
                index += 1
                if index == len(number_commodity):
                    commodity, unit, currency = number_commodity[0][1]
                    break
                commodity, unit, currency = number_commodity[index][1]
            adm1, adm2, market = adm1adm2market
            code = f'{adm1}-{adm2}-{market}-{commodity}-{unit}-{currency}'
            for date, price in sorted(commodities[(commodity, unit,
                                                   currency)]):
                qc_rows.append({'date': date, 'code': code, 'price': price})
            chosen_commodities.add(commodity)
            marketname = market
            if adm2 != market:
                marketname = f'{adm2}/{marketname}'
            if adm1 != adm2:
                marketname = f'{adm1}/{marketname}'
            qc_indicators.append({
                'code': code,
                'title': f'Price of {commodity} in {market}',
                'unit': f'Currency {currency}',
                'description':
                f'Price of {commodity} ({currency}/{unit}) in {marketname}',
                'code_col': '#meta+code',
                'value_col': '#value',
                'date_col': '#date'
            })
            if len(qc_indicators) == 3:
                break
        dataset['dataset_source'] = ', '.join(sorted(sources.values()))
        filename = f'wfp_food_prices_{countryiso3.lower()}.csv'
        resourcedata = {
            'name': title,
            'description': 'Food prices data with HXL tags',
            'format': 'csv'
        }
        rows = [rows[key] for key in sorted(rows)]
        dataset.generate_resource_from_iterator(headers,
                                                rows,
                                                hxltags,
                                                folder,
                                                filename,
                                                resourcedata,
                                                datecol='date')
        filename = f'wfp_food_prices_{countryiso3.lower()}_qc.csv'
        resourcedata = {
            'name': f'QuickCharts: {title}',
            'description': 'Food prices QuickCharts data with HXL tags',
            'format': 'csv'
        }
        dataset.generate_resource_from_rows(folder,
                                            filename,
                                            qc_rows,
                                            resourcedata,
                                            headers=list(qc_hxltags.keys()))
        showcase = Showcase({
            'name':
            f'{slugified_name}-showcase',
            'title':
            f'{title} showcase',
            'notes':
            f'{countryname} food prices data from World Food Programme displayed through VAM Economic Explorer',
            'url':
            f'http://dataviz.vam.wfp.org/economic_explorer/prices?iso3={countryiso3}',
            'image_url':
            'http://dataviz.vam.wfp.org/_images/home/3_economic.jpg'
        })
        showcase.add_tags(tags)
        return dataset, showcase, qc_indicators

Exemple #35

0

Afficher le fichier

Fichier : worldpop.py Projet : OCHA-DAP/hdx-scraper-worldpop

def generate_dataset_and_showcases(
    downloader, countryiso, indicator_metadata, countryalias
):
    """Parse json of the form:
    {'id': '1482', 'title': 'The spatial distribution of population in 2000,
        Zimbabwe', 'desc': 'Estimated total number of people per grid-cell...',  'doi': '10.5258/SOTON/WP00645',
        'date': '2018-11-01', 'popyear': '2000', 'citation': 'WorldPop',
        'data_file': 'GIS/Population/Global_2000_2020/2000/ZWE/zwe_ppp_2000.tif', 'archive': 'N', 'public': 'Y',
        'source': 'WorldPop, University of Southampton, UK', 'data_format': 'Geotiff', 'author_email': '*****@*****.**',
        'author_name': 'WorldPop', 'maintainer_name': 'WorldPop', 'maintainer_email': '*****@*****.**',
        'project': 'Population', 'category': 'Global per country 2000-2020', 'gtype': 'Population',
        'continent': 'Africa', 'country': 'Zimbabwe', 'iso3': 'ZWE',
        'files': ['ftp://ftp.worldpop.org.uk/GIS/Population/Global_2000_2020/2000/ZWE/zwe_ppp_2000.tif'],
        'url_img': 'https://www.worldpop.org/tabs/gdata/img/1482/zwe_ppp_wpgp_2000_Image.png',
        'organisation': 'WorldPop, University of Southampton, UK, www.worldpop.org',
        'license': 'https://www.worldpop.org/data/licence.txt',
        'url_summary': 'https://www.worldpop.org/geodata/summary?id=1482'}
    """
    allmetadata = dict()
    for subalias in countryalias:
        urls = countryalias[subalias]
        allmetadata_subalias = allmetadata.get(subalias, list())
        for url in urls:
            downloader.download(url)
            json = downloader.get_json()
            data = json["data"]
            if isinstance(data, list):
                allmetadata_subalias.extend(data)
            else:
                allmetadata_subalias.append(data)
        allmetadata[subalias] = allmetadata_subalias
    allmetadatavalues = list(allmetadata.values())
    lastmetadata = allmetadatavalues[0][-1]
    indicator_title = indicator_metadata["title"]
    if countryiso == "World":
        countryname = countryiso
    else:
        countryname = Country.get_country_name_from_iso3(countryiso)
        if not countryname:
            logger.exception(f"ISO3 {countryiso} not recognised!")
            return None, None
    title = f"{countryname} - {indicator_title}"
    slugified_name = slugify(f"WorldPop {indicator_title} for {countryname}").lower()
    logger.info(f"Creating dataset: {title}")
    licence_url = lastmetadata[
        "license"
    ].lower()  # suggest that they remove license and rename this field license
    downloader.download(licence_url)
    licence = downloader.get_text()
    methodologies = list()
    url_imgs = list()
    for allmetadatavalue in allmetadatavalues:
        lastallmetadatavalue = allmetadatavalue[-1]
        methodologies.append(lastallmetadatavalue["desc"])
        url_img = lastallmetadatavalue["url_img"]
        if not url_img:
            for lastallmetadatavalue in reversed(allmetadatavalue[:-1]):
                url_img = lastallmetadatavalue["url_img"]
                if url_img:
                    break
        url_imgs.append(url_img)
    methodology = get_matching_then_nonmatching_text(methodologies)
    dataset = Dataset(
        {
            "name": slugified_name,
            "title": title,
            "notes": f"{indicator_metadata['desc']}  \nData for earlier dates is available directly from WorldPop.  \n  \n{lastmetadata['citation']}",
            "methodology": "Other",
            "methodology_other": methodology,
            "dataset_source": lastmetadata["source"],
            "license_id": "hdx-other",
            "license_other": licence,
            "private": False,
        }
    )
    dataset.set_maintainer("37023db4-a571-4f28-8d1f-15f0353586af")
    dataset.set_organization("3f077dff-1d05-484d-a7c2-4cb620f22689")
    dataset.set_expected_update_frequency("Every year")
    dataset.set_subnational(True)
    try:
        dataset.add_other_location(countryiso)
    except HDXError as e:
        logger.exception(f"{countryname} has a problem! {e}")
        return None, None

    tags = [indicator_metadata["name"].lower(), "geodata"]
    dataset.add_tags(tags)

    earliest_year = 10000
    latest_year = 0
    resources_dict = dict()
    for subalias in allmetadata:
        for metadata in allmetadata[subalias]:
            if metadata["public"].lower() != "y":
                continue
            year = metadata["popyear"]
            if not year:
                year = metadata["date"][:4]
            year = int(year)
            if year > latest_year:
                latest_year = year
            if year < earliest_year:
                earliest_year = year
            for url in sorted(metadata["files"], reverse=True):
                resource_name = url[url.rfind("/") + 1 :]
                description = metadata["title"]
                if not re.match(r".*([1-3][0-9]{3})", resource_name):
                    resource_parts = resource_name.split(".")
                    resource_name = f"{resource_parts[0]}_{year}"
                    if len(resource_parts) >= 2:
                        resource_name = f"{resource_name}.{resource_parts[1]}"
                    description = f"{description} in {year}"
                resource = {
                    "name": resource_name,
                    "format": metadata["data_format"],
                    "url": url,
                    "description": description,
                }
                dict_of_lists_add(resources_dict, year, resource)
    if not resources_dict:
        logger.error(f"{title} has no data!")
        return None, None
    for year in sorted(resources_dict.keys(), reverse=True)[:5]:  # Just get last 5 years of data
        for resource in resources_dict[year]:
            dataset.add_update_resource(resource)

    dataset.set_dataset_year_range(earliest_year, latest_year)

    showcases = list()
    for i, url_img in enumerate(url_imgs):
        if not url_img:
            continue
        allmetadatavalue = allmetadatavalues[i][-1]
        url_summary = allmetadatavalue["url_summary"]
        if i == 0:
            name = f"{slugified_name}-showcase"
        else:
            name = f"{slugified_name}-{i + 1}-showcase"
        showcase = Showcase(
            {
                "name": name,
                "title": f"WorldPop {countryname} {indicator_title} Summary Page",
                "notes": f"Summary for {allmetadatavalue['category']} - {countryname}",
                "url": url_summary,
                "image_url": url_img,
            }
        )
        showcase.add_tags(tags)
        showcases.append(showcase)
    return dataset, showcases

Exemple #36

0

Afficher le fichier

Fichier : idmc.py Projet : OCHA-DAP/hdx-scraper-idmc

def generate_indicator_datasets_and_showcase(downloader, folder, indicators,
                                             tags):
    datasets = dict()
    countriesdata = dict()
    headersdata = dict()
    for indicator in indicators:
        metadata = downloader.download_tabular_key_value(
            indicator['spreadsheet'])
        name = metadata['Indicator Name']
        title = name
        dataset = get_dataset(title, tags, 'idmc-%s' % name)
        dataset[
            'notes'] = "%s\n\nContains data from IDMC's [Global Internal Displacement Database](http://www.internal-displacement.org/database/displacement-data)." % metadata[
                'Long definition']
        dataset['methodology_other'] = metadata[
            'Statistical concept and methodology']
        dataset['caveats'] = metadata['Limitations and exceptions']
        dataset.add_other_location('world')
        url = indicator['url']
        name = indicator['name']
        path = downloader.download_file(url, folder, '%s.xlsx' % name)
        data = hxl.data(path, allow_local=True)
        headers = data.headers
        hxltags = data.display_tags
        headersdata[name] = headers, hxltags
        years = set()
        rows = [headers, hxltags]
        for row in data:
            newrow = list()
            for hxltag in hxltags:
                newrow.append(row.get(hxltag))
            rows.append(newrow)
            iso3 = row.get('#country+code')
            epcountrydata = countriesdata.get(iso3, dict())
            dict_of_lists_add(epcountrydata, name, row)
            countriesdata[iso3] = epcountrydata
            year = row.get('#date+year')
            if year is None:
                continue
            years.add(year)

        resourcedata = {'name': name, 'description': title}
        filename = '%s.csv' % name
        dataset.generate_resource_from_rows(folder, filename, rows,
                                            resourcedata)

        years = sorted(list(years))
        dataset.set_dataset_year_range(years[0], years[-1])
        datasets[name] = dataset

    title = 'IDMC Global Report on Internal Displacement'
    slugified_name = slugify(title).lower()
    showcase = Showcase({
        'name':
        slugified_name,
        'title':
        title,
        'notes':
        'Click the image on the right to go to the %s' % title,
        'url':
        'http://www.internal-displacement.org/global-report/grid2018/',
        'image_url':
        'http://www.internal-displacement.org/global-report/grid2018/img/ogimage.jpg'
    })
    showcase.add_tags(tags)
    return datasets, showcase, headersdata, countriesdata

Exemple #37

0

Afficher le fichier

Fichier : hdro.py Projet : OCHA-DAP/hdx-scraper-hdro

def generate_dataset_and_showcase(folder, countryiso, countrydata,
                                  qc_indicators):
    countryname = Country.get_country_name_from_iso3(countryiso)
    title = '%s - Human Development Indicators' % countryname
    slugified_name = slugify('HDRO data for %s' % countryname).lower()
    logger.info('Creating dataset: %s' % title)
    dataset = Dataset({'name': slugified_name, 'title': title})
    dataset.set_maintainer('872427e4-7e9b-44d6-8c58-30d5052a00a2')
    dataset.set_organization('89ebe982-abe9-4748-9dde-cf04632757d6')
    dataset.set_expected_update_frequency('Every year')
    dataset.set_subnational(False)
    dataset.add_country_location(countryiso)
    tags = [
        'health', 'education', 'socioeconomic', 'demographics', 'development',
        'indicators', 'hxl'
    ]
    dataset.add_tags(tags)

    filename = 'hdro_indicators_%s.csv' % countryiso
    resourcedata = {
        'name': 'Human Development Indicators for %s' % countryname,
        'description': 'Human development data with HXL tags'
    }
    quickcharts = {
        'hashtag':
        '#indicator+code',
        'values': [x['code'] for x in qc_indicators],
        'cutdown':
        2,
        'cutdownhashtags':
        ['#indicator+code', '#date+year', '#indicator+value+num']
    }

    def yearcol_function(row):
        result = dict()
        year = row['year']
        if year:
            if len(year) == 9:
                startyear = year[:4]
                endyear = year[5:]
                result['startdate'], _ = parse_date_range(startyear,
                                                          date_format='%Y')
                _, result['enddate'] = parse_date_range(endyear,
                                                        date_format='%Y')
            else:
                result['startdate'], result['enddate'] = parse_date_range(
                    year, date_format='%Y')
        return result

    success, results = dataset.generate_resource_from_iterator(
        countrydata[0].keys(),
        countrydata,
        hxltags,
        folder,
        filename,
        resourcedata,
        date_function=yearcol_function,
        quickcharts=quickcharts)
    if success is False:
        logger.error('%s has no data!' % countryname)
        return None, None, None

    showcase = Showcase({
        'name':
        '%s-showcase' % slugified_name,
        'title':
        'Indicators for %s' % countryname,
        'notes':
        'Human Development indicators for %s' % countryname,
        'url':
        'http://hdr.undp.org/en/countries/profiles/%s' % countryiso,
        'image_url':
        'https://s1.stabroeknews.com/images/2019/12/undp.jpg'
    })
    showcase.add_tags(tags)

    return dataset, showcase, results['bites_disabled']

Exemple #38

0

Afficher le fichier

Fichier : dhs.py Projet : OCHA-DAP/hdx-scraper-dhs

def generate_datasets_and_showcase(configuration, base_url, downloader, folder,
                                   country, dhstags):
    """
    """
    countryiso = country['iso3']
    dhscountrycode = country['dhscode']
    countryname = Country.get_country_name_from_iso3(countryiso)
    title = '%s - Demographic and Health Data' % countryname
    logger.info('Creating datasets for %s' % title)
    tags = ['hxl', 'health', 'demographics']

    dataset = get_dataset(countryiso, tags)
    if dataset is None:
        return None, None, None, None
    dataset['title'] = title.replace('Demographic', 'National Demographic')
    slugified_name = slugify('DHS Data for %s' % countryname).lower()
    dataset['name'] = slugified_name
    dataset.set_subnational(False)

    subdataset = get_dataset(countryiso, tags)
    if dataset is None:
        return None, None, None, None

    subdataset['title'] = title.replace('Demographic',
                                        'Subnational Demographic')
    subslugified_name = slugify('DHS Subnational Data for %s' %
                                countryname).lower()
    subdataset['name'] = subslugified_name
    subdataset.set_subnational(True)

    dataset['notes'] = description % (
        subdataset['title'], configuration.get_dataset_url(subslugified_name))
    subdataset['notes'] = description % (
        dataset['title'], configuration.get_dataset_url(slugified_name))

    bites_disabled = {'national': dict(), 'subnational': dict()}

    def process_national_row(_, row):
        row['ISO3'] = countryiso
        if tagname == 'DHS Quickstats':
            process_quickstats_row(row, bites_disabled['national'])
        return row

    def process_subnational_row(_, row):
        row['ISO3'] = countryiso
        val = row['CharacteristicLabel']
        if val[:2] == '..':
            val = val[2:]
        row['Location'] = val
        if tagname == 'DHS Quickstats':
            process_quickstats_row(row, bites_disabled['subnational'])
        return row

    years = set()
    subyears = set()

    for dhstag in dhstags:
        tagname = dhstag['TagName'].strip()
        resource_name = '%s Data for %s' % (tagname, countryname)
        resourcedata = {
            'name': resource_name,
            'description': 'HXLated csv containing %s data' % tagname
        }

        url = '%sdata/%s?tagids=%s&breakdown=national&perpage=10000&f=csv' % (
            base_url, dhscountrycode, dhstag['TagID'])
        filename = '%s_national_%s.csv' % (tagname, countryiso)
        _, results = dataset.download_and_generate_resource(
            downloader,
            url,
            hxltags,
            folder,
            filename,
            resourcedata,
            header_insertions=[(0, 'ISO3')],
            row_function=process_national_row,
            yearcol='SurveyYear')
        years.update(results['years'])

        url = url.replace('breakdown=national', 'breakdown=subnational')
        filename = '%s_subnational_%s.csv' % (tagname, countryiso)
        try:
            insertions = [(0, 'ISO3'), (1, 'Location')]
            _, results = subdataset.download_and_generate_resource(
                downloader,
                url,
                hxltags,
                folder,
                filename,
                resourcedata,
                header_insertions=insertions,
                row_function=process_subnational_row,
                yearcol='SurveyYear')
            subyears.update(results['years'])
        except DownloadError as ex:
            cause = ex.__cause__
            if cause is not None:
                if 'Variable RET is undefined' not in str(cause):
                    raise ex
            else:
                raise ex
    if len(dataset.get_resources()) == 0:
        dataset = None
    else:
        set_dataset_date_bites(dataset, years, bites_disabled, 'national')
    if len(subdataset.get_resources()) == 0:
        subdataset = None
    else:
        set_dataset_date_bites(subdataset, subyears, bites_disabled,
                               'subnational')

    publication = get_publication(base_url, downloader, dhscountrycode)
    showcase = Showcase({
        'name': '%s-showcase' % slugified_name,
        'title': publication['PublicationTitle'],
        'notes': publication['PublicationDescription'],
        'url': publication['PublicationURL'],
        'image_url': publication['ThumbnailURL']
    })
    showcase.add_tags(tags)
    return dataset, subdataset, showcase, bites_disabled