def test_read_from_hdx(self, configuration, read): showcase = Showcase.read_from_hdx('TEST1') assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746' assert showcase['title'] == 'MyShowcase1' showcase = Showcase.read_from_hdx('TEST2') assert showcase is None showcase = Showcase.read_from_hdx('TEST3') assert showcase is None
def test_update_json(self, configuration, static_json): showcase_data = copy.deepcopy(TestShowcase.showcase_data) showcase = Showcase(showcase_data) assert showcase['title'] == 'MyShowcase1' assert showcase['name'] == 'showcase-1' showcase.update_from_json(static_json) assert showcase['title'] == 'MyShowcase1' assert showcase['name'] == 'new-showcase-1'
def test_read_from_hdx(self, configuration, read): showcase = Showcase.read_from_hdx('05e392bf-04e0-4ca6-848c-4e87bba10746') assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746' assert showcase['title'] == 'MyShowcase1' showcase = Showcase.read_from_hdx('TEST2') assert showcase is None showcase = Showcase.read_from_hdx('TEST3') assert showcase is None
def generate_dataset_and_showcase(acled_url, hxlproxy_url, downloader, countrydata): """ Create HXLated URLs to ACLED API eg. https://data.humdata.org/hxlproxy/data.csv?name=ACLEDHXL&url=https%3A//api.acleddata.com/acled/read.csv%3Flimit%3D0%26iso%3D120&tagger-match-all=on&tagger-02-header=iso&tagger-02-tag=%23country%2Bcode&tagger-03-header=event_id_cnty&tagger-03-tag=%23event%2Bcode&tagger-05-header=event_date&tagger-05-tag=%23date%2Boccurred+&tagger-08-header=event_type&tagger-08-tag=%23event%2Btype&tagger-09-header=actor1&tagger-09-tag=%23group%2Bname%2Bfirst&tagger-10-header=assoc_actor_1&tagger-10-tag=%23group%2Bname%2Bfirst%2Bassoc&tagger-12-header=actor2&tagger-12-tag=%23group%2Bname%2Bsecond&tagger-13-header=assoc_actor_2&tagger-13-tag=%23group%2Bname%2Bsecond%2Bassoc&tagger-16-header=region&tagger-16-tag=%23region%2Bname&tagger-17-header=country&tagger-17-tag=%23country%2Bname&tagger-18-header=admin1&tagger-18-tag=%23adm1%2Bname&tagger-19-header=admin2&tagger-19-tag=%23adm2%2Bname&tagger-20-header=admin3&tagger-20-tag=%23adm3%2Bname&tagger-21-header=location&tagger-21-tag=%23loc%2Bname&tagger-22-header=latitude&tagger-22-tag=%23geo%2Blat&tagger-23-header=longitude&tagger-23-tag=%23geo%2Blon&tagger-25-header=source&tagger-25-tag=%23meta%2Bsource&tagger-27-header=notes&tagger-27-tag=%23description&tagger-28-header=fatalities&tagger-28-tag=%23affected%2Bkilled&header-row=1 """ countryname = countrydata['countryname'] title = '%s - Conflict Data' % countryname logger.info('Creating dataset: %s' % title) slugified_name = slugify('ACLED Data for %s' % countryname).lower() countryiso = countrydata['iso3'] dataset = Dataset({ 'name': slugified_name, 'title': title, }) dataset.set_maintainer('8b84230c-e04a-43ec-99e5-41307a203a2f') dataset.set_organization('b67e6c74-c185-4f43-b561-0e114a736f19') dataset.set_expected_update_frequency('Live') dataset.set_subnational(True) dataset.add_country_location(countryiso) tags = ['HXL', 'conflicts', 'political violence', 'protests'] dataset.add_tags(tags) acled_country_url = '%siso=%d' % (acled_url, countrydata['m49']) url = '%surl=%s%s' % (hxlproxy_url, quote_plus(acled_country_url), hxlate) earliest_year = 10000 latest_year = 0 for row in downloader.get_tabular_rows(acled_country_url, dict_rows=True, headers=1): year = int(row['year']) if year < earliest_year: earliest_year = year if year > latest_year: latest_year = year if latest_year == 0: logger.warning('%s has no data!' % countryname) return None, None resource = { 'name': 'Conflict Data for %s' % countryname, 'description': 'Conflict data with HXL tags', 'format': 'csv', 'url': url } dataset.add_update_resource(resource) dataset.set_dataset_year_range(earliest_year, latest_year) showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': 'Dashboard for %s' % countrydata['countryname'], 'notes': 'Conflict Data Dashboard for %s' % countrydata['countryname'], 'url': 'https://www.acleddata.com/dashboard/#%03d' % countrydata['m49'], 'image_url': 'https://www.acleddata.com/wp-content/uploads/2018/01/dash.png' }) showcase.add_tags(tags) return dataset, showcase
def test_datasets(self, configuration, read): showcase = Showcase.read_from_hdx('05e392bf-04e0-4ca6-848c-4e87bba10746') datasets = showcase.get_datasets() assert len(datasets) == 10 assert datasets[0].data == datasetsdict[0] dict4 = copy.deepcopy(datasetsdict[4]) del dict4['resources'] assert datasets[4].data == dict4 TestShowcase.association = None showcase.remove_dataset(datasets[0]) assert TestShowcase.association == 'delete' TestShowcase.association = None assert showcase.add_dataset('a2f32edd-bac2-4940-aa58-49e565041055') is True assert TestShowcase.association == 'create' TestShowcase.association = None assert showcase.add_datasets([{'id': 'a2f32edd-bac2-4940-aa58-49e565041055'}, {'id': '6a5aebc1-f5a9-4842-8183-b8118228e71e'}]) is False assert TestShowcase.association == 'create' TestShowcase.association = None assert showcase.add_dataset({'name': 'TEST1'}) is True assert TestShowcase.association == 'create' TestShowcase.association = None with pytest.raises(HDXError): showcase.add_dataset('123') with pytest.raises(HDXError): showcase.add_dataset(123)
def test_delete_from_hdx(self, configuration, post_delete): showcase = Showcase.read_from_hdx( '05e392bf-04e0-4ca6-848c-4e87bba10746') showcase.delete_from_hdx() del showcase['id'] with pytest.raises(HDXError): showcase.delete_from_hdx()
def test_datasets(self, configuration, read): showcase = Showcase.read_from_hdx( '05e392bf-04e0-4ca6-848c-4e87bba10746') datasets = showcase.get_datasets() assert len(datasets) == 10 assert datasets[0].data == datasetsdict['results'][0] dict4 = copy.deepcopy(datasetsdict['results'][4]) del dict4['resources'] assert datasets[4].data == dict4 TestShowcase.association = None showcase.remove_dataset(datasets[0]) assert TestShowcase.association == 'delete' TestShowcase.association = None assert showcase.add_dataset( 'a2f32edd-bac2-4940-aa58-49e565041055') is True assert TestShowcase.association == 'create' TestShowcase.association = None assert showcase.add_datasets( [{ 'id': 'a2f32edd-bac2-4940-aa58-49e565041055' }, { 'id': '6a5aebc1-f5a9-4842-8183-b8118228e71e' }]) is False assert TestShowcase.association == 'create' TestShowcase.association = None assert showcase.add_dataset({'name': 'TEST1'}) is True assert TestShowcase.association == 'create' TestShowcase.association = None with pytest.raises(HDXError): showcase.add_dataset('123') with pytest.raises(HDXError): showcase.add_dataset(123)
def test_datasets(self, configuration, read): showcase = Showcase.read_from_hdx('TEST1') datasets = showcase.get_datasets() assert len(datasets) == 10 assert datasets[0].data == datasetsdict[0] dict4 = copy.deepcopy(datasetsdict[4]) del dict4['resources'] assert datasets[4].data == dict4 TestShowcase.association = None showcase.remove_dataset(datasets[0]) assert TestShowcase.association == 'delete' TestShowcase.association = None assert showcase.add_dataset('lala') is True assert TestShowcase.association == 'create' TestShowcase.association = None assert showcase.add_datasets([{ 'id': 'lala' }, { 'id': '6a5aebc1-f5a9-4842-8183-b8118228e71e' }]) is False assert TestShowcase.association == 'create' TestShowcase.association = None with pytest.raises(HDXError): showcase.add_dataset(123)
def create_dataset_showcase(name, countryname, countryiso2, countryiso3, single_dataset=False): slugified_name = slugify(name).lower() slugified_name = slugified_name.replace( "united-kingdom-of-great-britain-and-northern-ireland", "uk") # Too long slugified_name = slugified_name.replace( "demographic-and-socio-economic-indicators", "dsei") # Too long if single_dataset: title = '%s - Sustainable development, Education, Demographic and Socioeconomic Indicators' % countryname else: title = name dataset = Dataset({'name': slugified_name, 'title': title}) dataset.set_maintainer('196196be-6037-4488-8b71-d786adf4c081') dataset.set_organization('18f2d467-dcf8-4b7e-bffa-b3c338ba3a7c') dataset.set_subnational(False) try: dataset.add_country_location(countryiso3) except HDXError as e: logger.exception('%s has a problem! %s' % (countryname, e)) return None, None dataset.set_expected_update_frequency('Every year') tags = [ 'indicators', 'sustainable development', 'demographics', 'socioeconomics', 'education' ] dataset.add_tags(tags) showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': name, 'notes': 'Education, literacy and other indicators for %s' % countryname, 'url': 'http://uis.unesco.org/en/country/%s' % countryiso2, 'image_url': 'http://www.tellmaps.com/uis/internal/assets/uisheader-en.png' }) showcase.add_tags(tags) return dataset, showcase
def generate_dataset_and_showcase(countryName, countryISO2): title = '%s - Demographic, Health, Education and Transport indicators' % countryName logger.info('Creating dataset: %s' % title) name = 'unhabitat-%s-indicators' % countryISO2 slugified_name = slugify(name).lower() dataset = Dataset({ 'name': slugified_name, 'title': title, }) # dataset.set_dataset_date(date, dataset_end_date=) dataset.set_dataset_year_range(1950, 2050) dataset.set_expected_update_frequency('Every year') dataset.set_subnational(1) dataset.add_country_location(getCountryISO3Code(countryISO2)) dataset.add_tags(['EDUCATION', 'POPULATION', 'HEALTH', 'TRANSPORT', 'HXL']) if os.path.isfile('data/indicator_data_' + countryISO2 + '.csv'): resource = Resource() resource['name'] = 'Indicators_data_%s' % countryISO2 resource[ 'description'] = '%s - Demographic, Health, Education and Transport indicators' % countryName resource['format'] = 'csv' resource.set_file_to_upload('data/indicator_data_' + countryISO2 + '.csv') resource.check_required_fields(['group', 'package_id']) dataset.add_update_resource(resource) showcase_name = slugify('unhabitat-%s' % countryName + ' indacators-data').lower() showcase = Showcase({ 'name': showcase_name, 'title': 'Explore %s' % countryName + ' indicators', 'notes': 'Explore %s' % countryName + ' indicators', 'url': 'http://urbandata.unhabitat.org/data-country/?countries=%s' % countryISO2 + '&indicators=total_length_road,rural_population,urban_population_countries,urban_slum_population_countries,population,income_gini_coefficient_countries', 'image_url': 'https://centre.humdata.org/wp-content/uploads/2018/09/unhabitat-showcase.png' }) showcase.add_tags(['EDUCATION', 'POPULATION', 'HEALTH', 'TRANSPORT']) return dataset, showcase
def test_search_in_hdx(self, configuration, allsearch): showcases = Showcase.search_in_hdx('ACLED') assert len(showcases) == 10 showcases = Showcase.search_in_hdx('ACLED', offset=2, limit=6) assert len(showcases) == 6 showcases = Showcase.search_in_hdx( fq='metadata_modified:[2018-01-01T00:00:00.000Z TO NOW]') assert len(showcases) == 1 showcases = Showcase.search_in_hdx('ajyhgr') assert len(showcases) == 0 with pytest.raises(HDXError): Showcase.search_in_hdx('"') with pytest.raises(HDXError): Showcase.search_in_hdx('ACLED', rows=11) with pytest.raises(HDXError): # Test returned row counts per page mismatch (wrong count of 6 purposely in mocksearch) Showcase.search_in_hdx('ACLED', page_size=5)
def generate_dataset_and_showcase(base_url, downloader, folder, country): countryname = country['countryname'] title = '%s - Conflict Data' % countryname logger.info('Creating dataset: %s' % title) slugified_name = slugify('ACLED Data for %s' % countryname).lower() countryiso = country['iso3'] dataset = Dataset({ 'name': slugified_name, 'title': title, }) dataset.set_maintainer('8b84230c-e04a-43ec-99e5-41307a203a2f') dataset.set_organization('b67e6c74-c185-4f43-b561-0e114a736f19') dataset.set_expected_update_frequency('Every week') dataset.set_subnational(True) dataset.add_country_location(countryiso) tags = ['hxl', 'violence and conflict', 'protests', 'security incidents'] dataset.add_tags(tags) url = '%siso=%d' % (base_url, country['m49']) filename = 'conflict_data_%s.csv' % countryiso resourcedata = { 'name': 'Conflict Data for %s' % countryname, 'description': 'Conflict data with HXL tags' } quickcharts = {'cutdown': 2, 'cutdownhashtags': ['#date+year', '#adm1+name', '#affected+killed']} success, results = dataset.download_and_generate_resource(downloader, url, hxltags, folder, filename, resourcedata, yearcol='year', quickcharts=quickcharts) if success is False: logger.warning('%s has no data!' % countryname) return None, None showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': 'Dashboard for %s' % country['countryname'], 'notes': 'Conflict Data Dashboard for %s' % country['countryname'], 'url': 'https://www.acleddata.com/dashboard/#%03d' % country['m49'], 'image_url': 'https://www.acleddata.com/wp-content/uploads/2018/01/dash.png' }) showcase.add_tags(tags) return dataset, showcase
def generate_country_dataset_and_showcase(downloader, folder, headersdata, countryiso, countrydata, indicator_datasets, tags): indicator_datasets_list = indicator_datasets.values() title = extract_list_from_list_of_dict(indicator_datasets_list, 'title') countryname = Country.get_country_name_from_iso3(countryiso) dataset = get_dataset('%s - %s' % (countryname, title[0]), tags, 'IDMC IDP data for %s' % countryname) try: dataset.add_country_location(countryiso) except HDXError as e: logger.exception('%s has a problem! %s' % (countryname, e)) return None, None, None description = extract_list_from_list_of_dict(indicator_datasets_list, 'notes') dataset['notes'] = get_matching_then_nonmatching_text(description, separator='\n\n', ignore='\n') methodology = extract_list_from_list_of_dict(indicator_datasets_list, 'methodology_other') dataset['methodology_other'] = get_matching_then_nonmatching_text( methodology) caveats = extract_list_from_list_of_dict(indicator_datasets_list, 'caveats') dataset['caveats'] = get_matching_then_nonmatching_text(caveats) years = set() bites_disabled = [True, True, True] for endpoint in countrydata: data = countrydata[endpoint] headers, hxltags = headersdata[endpoint] rows = [headers, hxltags] for row in data: newrow = list() for hxltag in hxltags: newrow.append(row.get(hxltag)) rows.append(newrow) year = row.get('#date+year') conflict_stock = row.get('#affected+idps+ind+stock+conflict') if conflict_stock: bites_disabled[0] = False conflict_new = row.get('#affected+idps+ind+newdisp+conflict') if conflict_new: bites_disabled[1] = False disaster_new = row.get('#affected+idps+ind+newdisp+disaster') if disaster_new: bites_disabled[2] = False if year is None: continue years.add(year) name = indicator_datasets[endpoint].get_resources()[0]['description'] resourcedata = { 'name': endpoint, 'description': '%s for %s' % (name, countryname) } filename = '%s_%s.csv' % (endpoint, countryname) dataset.generate_resource_from_rows(folder, filename, rows, resourcedata) years = sorted(list(years)) dataset.set_dataset_year_range(years[0], years[-1]) url = 'http://www.internal-displacement.org/countries/%s/' % countryname.replace( ' ', '-') try: downloader.setup(url) except DownloadError: altname = Country.get_country_info_from_iso3( countryiso)['#country+alt+i_en+name+v_unterm'] url = 'http://www.internal-displacement.org/countries/%s/' % altname try: downloader.setup(url) except DownloadError: return dataset, None, bites_disabled showcase = Showcase({ 'name': '%s-showcase' % dataset['name'], 'title': 'IDMC %s Summary Page' % countryname, 'notes': 'Click the image on the right to go to the IDMC summary page for the %s dataset' % countryname, 'url': url, 'image_url': 'http://www.internal-displacement.org/sites/default/files/logo_0.png' }) showcase.add_tags(tags) return dataset, showcase, bites_disabled
def test_get_all_showcases(self, configuration, allsearch): showcases = Showcase.get_all_showcases() assert len(showcases) == 20
def generate_dataset_and_showcase(base_url, downloader, countrydata, indicators): """ http://apps.who.int/gho/athena/api/GHO/WHOSIS_000001.csv?filter=COUNTRY:BWA&profile=verbose """ countryname = countrydata['display'] title = '%s - Health Indicators' % countryname logger.info('Creating dataset: %s' % title) slugified_name = slugify('WHO data for %s' % countryname).lower() countryiso = countrydata['label'] for attr in countrydata['attr']: if attr['category'] == 'ISO': countryiso = attr['value'] dataset = Dataset({ 'name': slugified_name, 'title': title, }) dataset.set_maintainer('196196be-6037-4488-8b71-d786adf4c081') dataset.set_organization('hdx') dataset.set_expected_update_frequency('Every year') dataset.set_subnational(False) try: dataset.add_country_location(countryiso) except HDXError as e: logger.exception('%s has a problem! %s' % (countryname, e)) return None, None tags = ['indicators'] dataset.add_tags(tags) earliest_year = 10000 latest_year = 0 for indicator_code, indicator_name, indicator_url in indicators: no_rows = 0 url = '%sGHO/%s.csv?filter=COUNTRY:%s&profile=verbose' % (base_url, indicator_code, countryiso) try: for row in downloader.get_tabular_rows(url, dict_rows=True, headers=1): no_rows += 1 year = row['YEAR (CODE)'] if '-' in year: years = year.split('-') else: years = [year] for year in years: year = int(year) if year < earliest_year: earliest_year = year if year > latest_year: latest_year = year except Exception: continue if no_rows == 0: continue resource = { 'name': indicator_name, 'description': '[Indicator metadata](%s)' % indicator_url, 'format': 'csv', 'url': url } dataset.add_update_resource(resource) if len(dataset.get_resources()) == 0: logger.exception('%s has no data!' % countryname) return None, None dataset.set_dataset_year_range(earliest_year, latest_year) isolower = countryiso.lower() showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': 'Indicators for %s' % countryname, 'notes': 'Health indicators for %s' % countryname, 'url': 'http://www.who.int/countries/%s/en/' % isolower, 'image_url': 'http://www.who.int/sysmedia/images/countries/%s.gif' % isolower }) showcase.add_tags(tags) return dataset, showcase
def generate_dataset_and_showcase( indicatorsetname, indicatorsets, country, countrymapping, showcase_base_url, filelist_url, downloader, folder, ): countryiso = country["iso3"] countryname = country["countryname"] indicatorset = indicatorsets[indicatorsetname] if indicatorsetname == "Prices": indicatorsetdisplayname = indicatorsetname else: indicatorsetdisplayname = f"{indicatorsetname} Indicators" title = f"{countryname} - {indicatorsetdisplayname}" name = f"FAOSTAT {indicatorsetdisplayname} for {countryname}" slugified_name = slugify(name).lower() logger.info(f"Creating dataset: {title}") dataset = Dataset({"name": slugified_name, "title": title}) dataset.set_maintainer("196196be-6037-4488-8b71-d786adf4c081") dataset.set_organization("ed727a5b-3e6e-4cd6-b97e-4a71532085e6") dataset.set_expected_update_frequency("Every year") dataset.set_subnational(False) try: dataset.add_country_location(countryiso) except HDXError as e: logger.exception(f"{countryname} has a problem! {e}") return None, None, None, None tags = ["hxl", "indicators"] tag = indicatorsetname.lower() if " - " in tag: tags.extend(tag.split(" - ")) else: tags.append(tag) dataset.add_tags(tags) def process_date(row): countrycode = row.get("Area Code") if countrycode is None: return None result = countrymapping.get(countrycode) if result is None: return None isolookup, _ = result if isolookup != countryiso: return None row["Iso3"] = countryiso year = row["Year"] month = row.get("Months") if month is not None and month != "Annual value": startdate, enddate = parse_date_range(f"{month} {year}") else: if "-" in year: yearrange = year.split("-") startdate, _ = parse_date_range(yearrange[0]) _, enddate = parse_date_range(yearrange[1]) row["Year"] = yearrange[1] else: startdate, enddate = parse_date_range(year) row["StartDate"] = startdate.strftime("%Y-%m-%d") row["EndDate"] = enddate.strftime("%Y-%m-%d") return {"startdate": startdate, "enddate": enddate} bites_disabled = [True, True, True] qc_indicators = None categories = list() for row in indicatorset: longname = row["DatasetName"] url = row["path"] category = longname.split(": ")[1] filename = f"{category}_{countryiso}.csv" description = f"*{category}:*\n{row['DatasetDescription']}" if category[-10:] == "Indicators": name = category else: name = f"{category} data" resourcedata = { "name": f"{name} for {countryname}", "description": description } header_insertions = [(0, "EndDate"), (0, "StartDate"), (0, "Iso3")] indicators_for_qc = row.get("quickcharts") if indicators_for_qc: quickcharts = { "hashtag": "#indicator+code", "values": [x["code"] for x in indicators_for_qc], "numeric_hashtag": "#indicator+value+num", "cutdown": 2, "cutdownhashtags": ["#indicator+code", "#country+code", "#date+year"], } qc_indicators = indicators_for_qc else: quickcharts = None success, results = dataset.download_and_generate_resource( downloader, url, hxltags, folder, filename, resourcedata, header_insertions=header_insertions, date_function=process_date, quickcharts=quickcharts, encoding="WINDOWS-1252", ) if success is False: logger.warning(f"{category} for {countryname} has no data!") continue disabled_bites = results.get("bites_disabled") if disabled_bites: bites_disabled = disabled_bites categories.append(category) if dataset.number_of_resources() == 0: logger.warning(f"{countryname} has no data!") return None, None, None, None dataset.quickcharts_resource_last() notes = [ f"{indicatorsetdisplayname} for {countryname}.\n\n", f"Contains data from the FAOSTAT [bulk data service]({filelist_url})", ] if len(categories) == 1: notes.append(".") else: notes.append( f" covering the following categories: {', '.join(categories)}") dataset["notes"] = "".join(notes) showcase = Showcase({ "name": f"{slugified_name}-showcase", "title": title, "notes": f"{indicatorsetname} Data Dashboard for {countryname}", "url": f"{showcase_base_url}{countryiso}", "image_url": "https://pbs.twimg.com/profile_images/1375385494167691269/Bc49-Yx8_400x400.jpg", }) showcase.add_tags(tags) return dataset, showcase, bites_disabled, qc_indicators
def generate_dataset_resources_and_showcase(pop_types, today): title = 'Energy consumption of refugees and displaced people' slugified_name = slugify(title.lower()) dataset = Dataset({ 'name': slugified_name, 'title': title, }) dataset.set_maintainer('196196be-6037-4488-8b71-d786adf4c081') dataset.set_organization('0c6bf79f-504c-4ba5-9fdf-c8cc893c8b2f') dataset.set_dataset_date_from_datetime(today) dataset.set_expected_update_frequency('Every month') dataset.add_other_location('world') tags = ['HXL', 'energy', 'refugees', 'internally displaced persons - idp'] dataset.add_tags(tags) resources = list() for pop_type in pop_types: resource_data = { 'name': '%s_consumption.csv' % pop_type.lower().replace(' ', '_'), 'description': '%s %s' % (pop_type, title.lower()), 'format': 'csv' } resources.append(Resource(resource_data)) resource_data = { 'name': 'population.csv', 'description': 'UNHCR displaced population totals', 'format': 'csv' } resources.append(Resource(resource_data)) resource_data = { 'name': 'keyfigures_disagg.csv', 'description': 'Disaggregated MEI Key Figures', 'format': 'csv' } resources.append(Resource(resource_data)) resource_data = { 'name': 'keyfigures.csv', 'description': 'MEI Key Figures', 'format': 'csv' } resources.append(Resource(resource_data)) showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': 'Energy services for refugees and displaced people', 'notes': 'Click the image on the right to go to the energy services model', 'url': 'http://www.sciencedirect.com/science/article/pii/S2211467X16300396', 'image_url': 'https://ars.els-cdn.com/content/image/X2211467X.jpg' }) showcase.add_tags(tags) return dataset, resources, showcase
def generate_dataset_and_showcase(folder, country, countrydata, qc_rows, headers, resources, fields): """ """ countryiso = country["iso3"] countryname = country["countryname"] title_text = "Data on forcibly displaced populations and stateless persons" if countryname == "World": title = f"{title_text} (Global)" else: title = f"{countryname} - {title_text}" logger.info(f"Creating dataset: {title}") slugified_name = slugify(f"UNHCR Population Data for {countryiso}").lower() dataset = Dataset({"name": slugified_name, "title": title}) dataset.set_maintainer("8d70b12b-7247-48d2-b426-dbb4bf82eb7c") dataset.set_organization("abf4ca86-8e69-40b1-92f7-71509992be88") dataset.set_expected_update_frequency("Every six months") dataset.set_subnational(True) if countryiso == WORLD: dataset.add_other_location("world") else: # Check for unknown country names try: dataset.add_country_location(countryiso) except HDXError: logger.error(f"{countryname} ({countryiso}) not recognised!") return None, None, None tags = ["hxl", "refugees", "asylum", "population"] dataset.add_tags(tags) # Filter the quick chart data to only include the relevant data for the current country qcRowSubset = SubsetQuickChartData(country, qc_rows) def process_dates(row): year = int(row["Year"]) startdate = datetime(year, 1, 1) # For mid-year data it should be 30-June... # enddate = datetime(year, 12, 31) if IS_ASR is False and year == LATEST_YEAR: enddate = datetime(year, 6, 30) else: enddate = datetime(year, 12, 31) return {"startdate": startdate, "enddate": enddate} earliest_startdate = None latest_enddate = None for resource_name, resource_rows in countrydata.items(): resource_id = "_".join(resource_name.split("_")[:-1]) originating_residing = resource_name.split("_")[ -1] # originating or residing record = resources[resource_id] if ( countryiso == WORLD ): # refugees and asylum applicants contain the same data for WORLD if originating_residing == "originating": continue format_parameters = dict(countryiso=countryiso.lower(), countryname=countryname) filename = f"{resource_name}_{countryiso}.csv" resourcedata = { "name": record[originating_residing]["title"].format(**format_parameters), "description": record[originating_residing]["description"].format( **format_parameters), } resourcedata["name"] = resourcedata["name"].replace( "residing in World", "(Global)") rowit = RowIterator(headers[resource_name], resource_rows).with_fields(fields) success, results = dataset.generate_resource_from_iterator( rowit.headers(), rowit, rowit.hxltags_mapping(), folder, filename, resourcedata, date_function=process_dates, encoding="utf-8", ) if success is False: logger.warning(f"{countryname} - {resource_name} has no data!") else: startdate = results["startdate"] if earliest_startdate is None or startdate < earliest_startdate: earliest_startdate = startdate enddate = results["enddate"] if latest_enddate is None or enddate > latest_enddate: latest_enddate = enddate if len(dataset.get_resources()) == 0: logger.error(f"{countryname} has no data!") return None, None, None dataset.set_date_of_dataset(earliest_startdate, latest_enddate) bites_disabled = [True, True, True] if countryiso != WORLD: filename = "qc_data.csv" resourcedata = { "name": filename, "description": f"QuickCharts data for {countryname}", } rowit = (ListIterator( data=list(qcRowSubset.values()), headers=[ "Year", "ISO3CoO", "CoO_name", "ISO3CoA", "CoA_name", "Displaced From", "Displaced Stateless Within", "Displaced Stateless From", ], ).auto_headers().to_list_iterator()) years = sorted(set(rowit.column("Year")))[-10:] # Last 10 years headers = rowit.headers() rowit = ( rowit.select(lambda row, years=years: row.get("Year") in years ) # Restrict data to only last 10 years .with_sum_field( "Displaced From", "#affected+displaced+outgoing", [ x for x in headers if x.startswith(("REF", "ASY", "VDA")) and x.endswith("_outgoing") ], ).with_sum_field( "Displaced Stateless Within", "#affected+displaced+stateless+incoming", [ x for x in headers if x.startswith(("REF", "ASY", "IDP", "VDA", "STA")) and x.endswith("_incoming") ], ).with_sum_field( "Displaced Stateless From", "#affected+displaced+stateless+outgoing", [ x for x in headers if x.startswith(("REF", "ASY", "IDP", "VDA", "STA")) and x.endswith("_outgoing") ], ).with_fields(fields)) for row in rowit: if (row["Country of Origin Code"] == countryiso and row["Displaced From"] > 0): bites_disabled[0] = False if row["Year"] != years[-1]: continue if (row["Country of Asylum Code"] == countryiso and row["Displaced Stateless Within"] > 0): bites_disabled[1] = False if (row["Country of Origin Code"] == countryiso and row["Displaced Stateless From"] > 0): bites_disabled[2] = False rowit.reset() success, results = dataset.generate_resource_from_iterator( rowit.headers(), rowit, rowit.hxltags_mapping(), folder, filename, resourcedata, date_function=process_dates, encoding="utf-8", ) if success is False: logger.warning( f"QuickCharts {countryname} - {filename} has no data!") showcase = Showcase({ "name": f"{slugified_name}-showcase", "title": title, "notes": f"UNHCR Population Data Dashboard for {countryname}", "url": "https://www.unhcr.org/refugee-statistics/", "image_url": "https://www.unhcr.org/assets/img/unhcr-logo.png", }) showcase.add_tags(tags) return dataset, showcase, bites_disabled
def test_tags(self, configuration): showcase_data = copy.deepcopy(TestShowcase.showcase_data) showcase = Showcase(showcase_data) assert showcase.get_tags() == ['economy', 'health'] showcase.add_tag('wash') assert showcase.get_tags() == ['economy', 'health', 'wash'] showcase.add_tags(['sanitation']) assert showcase.get_tags() == ['economy', 'health', 'wash', 'sanitation'] result = showcase.remove_tag('wash') assert result is True assert showcase.get_tags() == ['economy', 'health', 'sanitation'] showcase['tags'] = None result = showcase.remove_tag('wash') assert result is False
def test_update_in_hdx(self, configuration, post_update): showcase = Showcase() showcase['id'] = 'NOTEXIST' with pytest.raises(HDXError): showcase.update_in_hdx() showcase['title'] = 'LALA' with pytest.raises(HDXError): showcase.update_in_hdx() showcase = Showcase.read_from_hdx('TEST1') assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746' assert showcase['title'] == 'MyShowcase1' showcase['name'] = 'TEST1' showcase['notes'] = 'lalalala' showcase.update_in_hdx() assert showcase['name'] == 'TEST1' assert showcase['notes'] == 'lalalala' expected = copy.deepcopy(showcase_resultdict) expected['notes'] = 'lalalala' expected['name'] = 'TEST1' assert showcase.get_old_data_dict() == expected showcase['name'] = 'NOTEXIST' with pytest.raises(HDXError): showcase.update_in_hdx() del showcase['name'] with pytest.raises(HDXError): showcase.update_in_hdx() showcase_data = copy.deepcopy(TestShowcase.showcase_data) showcase_data['title'] = 'MyShowcase1' showcase_data['name'] = 'TEST1' showcase = Showcase(showcase_data) showcase.create_in_hdx() assert showcase['name'] == 'TEST1' assert showcase['notes'] == 'My Showcase'
def test_delete_from_hdx(self, configuration, post_delete): showcase = Showcase.read_from_hdx('05e392bf-04e0-4ca6-848c-4e87bba10746') showcase.delete_from_hdx() del showcase['id'] with pytest.raises(HDXError): showcase.delete_from_hdx()
def generate_datasets_and_showcases(downloader, folder, indicatorname, indicatortypedata, countriesdata, showcase_base_url): dataset_template = Dataset() dataset_template.set_maintainer('196196be-6037-4488-8b71-d786adf4c081') dataset_template.set_organization('ed727a5b-3e6e-4cd6-b97e-4a71532085e6') dataset_template.set_expected_update_frequency('Every year') dataset_template.set_subnational(False) tags = ['hxl', indicatorname.lower()] dataset_template.add_tags(tags) earliest_year = 10000 latest_year = 0 countrycode = None iso3 = None countryname = None rows = None datasets = list() showcases = list() def output_csv(): if rows is None: return headers = deepcopy(downloader.response.headers) for i, header in enumerate(headers): if 'year' in header.lower(): headers.insert(i, 'EndYear') headers.insert(i, 'StartYear') break headers.insert(0, 'Iso3') hxlrow = dict() for header in headers: hxlrow[header] = hxltags.get(header, '') rows.insert(0, hxlrow) filepath = join(folder, '%s_%s.csv' % (indicatorname, countrycode)) write_list_to_csv(rows, filepath, headers=headers) ds = datasets[-1] ds.set_dataset_year_range(earliest_year, latest_year) ds.resources[0].set_file_to_upload(filepath) for row in downloader.get_tabular_rows(indicatortypedata['FileLocation'], dict_rows=True, headers=1, format='csv', encoding='WINDOWS-1252'): newcountry = row['Area Code'] if newcountry != countrycode: output_csv() rows = None countrycode = newcountry result = countriesdata.get(countrycode) if result is None: logger.warning('Ignoring %s' % countrycode) continue iso3, cn = result countryname = Country.get_country_name_from_iso3(iso3) if countryname is None: logger.error('Missing country %s: %s, %s' % (countrycode, cn, iso3)) continue rows = list() title = '%s - %s Indicators' % (countryname, indicatorname) logger.info('Generating dataset: %s' % title) name = 'FAOSTAT %s indicators for %s' % (countryname, indicatorname) slugified_name = slugify(name).lower() dataset = Dataset(deepcopy(dataset_template.data)) dataset['name'] = slugified_name dataset['title'] = title dataset.update_from_yaml() dataset.add_country_location(countryname) earliest_year = 10000 latest_year = 0 resource = Resource({'name': title, 'description': ''}) resource.set_file_type('csv') dataset.add_update_resource(resource) datasets.append(dataset) showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': title, 'notes': dataset['notes'], 'url': '%s%s' % (showcase_base_url, countrycode), 'image_url': 'http://www.fao.org/uploads/pics/food-agriculture.png' }) showcase.add_tags(tags) showcases.append(showcase) row['Iso3'] = iso3 row['Area'] = countryname year = row['Year'] if '-' in year: years = year.split('-') row['StartYear'] = years[0] row['EndYear'] = years[1] else: years = [year] row['StartYear'] = year row['EndYear'] = year for year in years: year = int(year) if year < earliest_year: earliest_year = year if year > latest_year: latest_year = year if rows is not None: rows.append(row) output_csv() return datasets, showcases
def test_update_in_hdx(self, configuration, post_update): showcase = Showcase() showcase['id'] = 'NOTEXIST' with pytest.raises(HDXError): showcase.update_in_hdx() showcase['title'] = 'LALA' with pytest.raises(HDXError): showcase.update_in_hdx() showcase = Showcase.read_from_hdx('05e392bf-04e0-4ca6-848c-4e87bba10746') assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746' assert showcase['title'] == 'MyShowcase1' showcase['name'] = 'TEST1' showcase['notes'] = 'lalalala' showcase.update_in_hdx() assert showcase['name'] == 'TEST1' assert showcase['notes'] == 'lalalala' expected = copy.deepcopy(showcase_resultdict) expected['notes'] = 'lalalala' expected['name'] = 'TEST1' assert showcase.get_old_data_dict() == expected showcase['name'] = 'NOTEXIST' with pytest.raises(HDXError): showcase.update_in_hdx() del showcase['name'] with pytest.raises(HDXError): showcase.update_in_hdx() showcase_data = copy.deepcopy(TestShowcase.showcase_data) showcase_data['title'] = 'MyShowcase1' showcase_data['name'] = 'TEST1' showcase = Showcase(showcase_data) showcase.create_in_hdx() assert showcase['name'] == 'TEST1' assert showcase['notes'] == 'My Showcase'
def test_create_in_hdx(self, configuration, post_create): showcase = Showcase() with pytest.raises(HDXError): showcase.create_in_hdx() showcase['id'] = '05e392bf-04e0-4ca6-848c-4e87bba10746' showcase['title'] = 'LALA' with pytest.raises(HDXError): showcase.create_in_hdx() showcase_data = copy.deepcopy(TestShowcase.showcase_data) showcase = Showcase(showcase_data) showcase.create_in_hdx() assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746' showcase_data['title'] = 'MyShowcase2' showcase = Showcase(showcase_data) with pytest.raises(HDXError): showcase.create_in_hdx() showcase_data['title'] = 'MyShowcase3' showcase = Showcase(showcase_data) with pytest.raises(HDXError): showcase.create_in_hdx()
def generate_joint_dataset_and_showcase(wfpfood_url, downloader, folder, countriesdata): """Generate single joint datasets and showcases containing data for all countries. """ title = 'Global Food Prices Database (WFP)' logger.info('Creating joint dataset: %s' % title) slugified_name = 'wfp-food-prices' df = joint_dataframe(wfpfood_url, downloader, countriesdata) if len(df) <= 1: logger.warning('Dataset "%s" is empty' % title) return None, None dataset = Dataset({'name': slugified_name, 'title': title}) dataset.set_maintainer( "9957c0e9-cd38-40f1-900b-22c91276154b") # Orest Dubay # dataset.set_maintainer("154de241-38d6-47d3-a77f-0a9848a61df3") dataset.set_organization("3ecac442-7fed-448d-8f78-b385ef6f84e7") maxmonth = (100 * df.mp_year + df.mp_month).max() % 100 dataset.set_dataset_date("%04d-01-01" % df.mp_year.min(), "%04d-%02d-15" % (df.mp_year.max(), maxmonth), "%Y-%m-%d") dataset.set_expected_update_frequency("weekly") dataset.add_country_locations(sorted(df.adm0_name.unique())) dataset.add_tags(tags) file_csv = join(folder, "WFPVAM_FoodPrices.csv") df.to_csv(file_csv, index=False) resource = Resource({ 'name': title, 'description': "Word Food Programme – Food Prices Data Source: WFP Vulnerability Analysis and Mapping (VAM)." }) resource.set_file_type('csv') # set the file type to eg. csv resource.set_file_to_upload(file_csv) dataset.add_update_resource(resource) showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': 'Global Food Prices', 'notes': "Interactive data visualisation of WFP's Food Market Prices dataset", 'url': "https://data.humdata.org/organization/wfp#interactive-data", 'image_url': "https://docs.humdata.org/wp-content/uploads/wfp_food_prices_data_viz.gif" }) showcase.add_tags(tags) dataset.update_from_yaml() dataset['notes'] = dataset[ 'notes'] % 'Global Food Prices data from the World Food Programme covering' dataset.create_in_hdx() showcase.create_in_hdx() showcase.add_dataset(dataset) dataset.get_resource().create_datastore_from_yaml_schema( yaml_path="wfp_food_prices.yml", path=file_csv) logger.info('Finished joint dataset') return dataset, showcase
def generate_dataset_and_showcase(folder, country, countrydata, headers): """ """ countryiso = country['iso3'] countryname = country['countryname'] title = '%s - Conflict Data' % countryname logger.info('Creating dataset: %s' % title) slugified_name = slugify('UCDP Data for %s' % countryname).lower() dataset = Dataset({ 'name': slugified_name, 'title': title, }) dataset.set_maintainer('196196be-6037-4488-8b71-d786adf4c081') dataset.set_organization('hdx') dataset.set_expected_update_frequency('As needed') dataset.set_subnational(True) dataset.add_country_location(countryiso) tags = ['hxl', 'violence and conflict', 'protests', 'security incidents'] dataset.add_tags(tags) filename = 'conflict_data_%s.csv' % countryiso resourcedata = { 'name': 'Conflict Data for %s' % countryname, 'description': 'Conflict data with HXL tags' } def process_year(years, row): start_year = int(row['date_start'][:4]) end_year = int(row['date_end'][:4]) years.add(start_year) years.add(end_year) row['start_year'] = start_year row['end_year'] = end_year quickcharts = { 'cutdown': 2, 'cutdownhashtags': ['#date+year+end', '#adm1+name', '#affected+killed'] } success, results = dataset.generate_resource_from_download( headers, countrydata, hxltags, folder, filename, resourcedata, year_function=process_year, quickcharts=quickcharts) if success is False: logger.warning('%s has no data!' % countryname) return None, None showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': title, 'notes': 'Conflict Data Dashboard for %s' % countryname, 'url': 'https://ucdp.uu.se/#country/%s' % countrydata[0]['country_id'], 'image_url': 'https://pbs.twimg.com/profile_images/832251660718178304/y-LWa5iK_200x200.jpg' }) showcase.add_tags(tags) return dataset, showcase
def generate_dataset_and_showcase(wfpfood_url, downloader, folder, countrydata, shortcuts): """Generate datasets and showcases for each country. """ title = '%s - Food Prices' % countrydata['name'] logger.info('Creating dataset: %s' % title) name = 'WFP food prices for %s' % countrydata[ 'name'] # Example name which should be unique so can include organisation name and country slugified_name = slugify(name).lower() df = read_dataframe(wfpfood_url, downloader, countrydata) if len(df) <= 1: logger.warning('Dataset "%s" is empty' % title) return None, None dataset = Dataset({ 'name': slugified_name, 'title': title, "dataset_preview": "resource_id" }) dataset.set_maintainer( "9957c0e9-cd38-40f1-900b-22c91276154b") # Orest Dubay # dataset.set_maintainer("154de241-38d6-47d3-a77f-0a9848a61df3") dataset.set_organization("3ecac442-7fed-448d-8f78-b385ef6f84e7") dataset.set_dataset_date(df.loc[1:].date.min(), df.loc[1:].date.max(), "%Y-%m-%d") dataset.set_expected_update_frequency("weekly") dataset.add_country_location(countrydata["name"]) dataset.set_subnational(True) dataset.add_tags(tags) dataset.add_tag('hxl') file_csv = join( folder, "WFP_food_prices_%s.csv" % countrydata["name"].replace(" ", "-")) df.to_csv(file_csv, index=False) resource = Resource({ 'name': title, "dataset_preview_enabled": "False", 'description': "Food prices data with HXL tags" }) resource.set_file_type('csv') # set the file type to eg. csv resource.set_file_to_upload(file_csv) dataset.add_update_resource(resource) df1 = quickchart_dataframe(df, shortcuts) file_csv = join( folder, "WFP_food_median_prices_%s.csv" % countrydata["name"].replace(" ", "-")) df1.to_csv(file_csv, index=False) resource = Resource({ 'name': '%s - Food Median Prices' % countrydata['name'], "dataset_preview_enabled": "True", 'description': """Food median prices data with HXL tags. Median of all prices for a given commodity observed on different markets is shown, together with the market where it was observed. Data are shortened in multiple ways: - Rather that prices on all markets, only median price across all markets is shown, together with the market where it has been observed. - Only food commodities are displayed (non-food commodities like fuel and wages are not shown). - Only data after %s are shown. Missing data are interpolated. - Column with shorter commodity names "cmnshort" are available to be used as chart labels. - Units are adapted and prices are rescaled in order to yield comparable values (so that they can be displayed and compared in a single chart). Scaling factor is present in scaling column. Label with full commodity name and a unit (with scale if applicable) is in column "label". This reduces the amount of data and allows to make cleaner charts. """ % (df1.loc[1:].date.min()) }) resource.set_file_type('csv') # set the file type to eg. csv resource.set_file_to_upload(file_csv) dataset.add_update_resource(resource) showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': title + " showcase", 'notes': countrydata["name"] + " food prices data from World Food Programme displayed through VAM Economic Explorer", 'url': "http://dataviz.vam.wfp.org/economic_explorer/prices?adm0=" + countrydata["code"], 'image_url': "http://dataviz.vam.wfp.org/_images/home/economic_2-4.jpg" }) showcase.add_tags(tags) return dataset, showcase
def test_create_in_hdx(self, configuration, post_create): showcase = Showcase() with pytest.raises(HDXError): showcase.create_in_hdx() showcase['id'] = 'TEST1' showcase['title'] = 'LALA' with pytest.raises(HDXError): showcase.create_in_hdx() showcase_data = copy.deepcopy(TestShowcase.showcase_data) showcase = Showcase(showcase_data) showcase.create_in_hdx() assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746' showcase_data['title'] = 'MyShowcase2' showcase = Showcase(showcase_data) with pytest.raises(HDXError): showcase.create_in_hdx() showcase_data['title'] = 'MyShowcase3' showcase = Showcase(showcase_data) with pytest.raises(HDXError): showcase.create_in_hdx()
def make_hdx_entries(start_date, **params): logger.info('Adding any datasets created or updated after %s' % start_date.date().isoformat()) # Connect to the database connection = pymysql.connect(**params) try: with connection.cursor() as cursor: # Read all countries sql = "SELECT * FROM `area`" cursor.execute(sql) unosatCountryCodes = dict() for unosatCountryCode in cursor: unosatCountryCodes[unosatCountryCode[ 'id_area']] = unosatCountryCode['area_iso3'] # Read a multiple records sql = "SELECT * FROM `product` WHERE NOT (GDB_Link LIKE '' AND SHP_Link LIKE '') AND (product_archived IS FALSE) AND (product_created>%s or updated>%s)" cursor.execute(sql, (start_date, start_date)) if not cursor.rowcount: raise UNOSATError('No db results found') batch = get_uuid() for unosatDBEntry in cursor: if not unosatDBEntry: raise UNOSATError('Empty row in db!') productID = str(unosatDBEntry['id_product']) logger.info('Processing UNOSAT product %s' % productID) logger.debug(unosatDBEntry) id_area = unosatDBEntry['id_area'] iso3 = unosatCountryCodes[id_area] product_glide = unosatDBEntry['product_glide'] # logger.info('product_glide = %s' % product_glide) typetag = product_glide[:2] product_description = unosatDBEntry['product_description'] if '-' in product_glide: glideiso3 = product_glide.split('-')[3] product_description = '**Glide code: %s** %s' % ( product_glide, product_description) else: glideiso3 = product_glide[10:13] product_description = '**UNOSAT code: %s** %s' % ( product_glide, product_description) if iso3 != glideiso3: raise UNOSATError( 'UNOSAT id_area=%s, area_iso3=%s does not match glide iso3=%s' % (id_area, iso3, glideiso3)) # Dataset variables title = unosatDBEntry['product_title'] slugified_name = slugify(title) if len(slugified_name) > 90: slugified_name = slugified_name.replace( 'satellite-detected-', '') slugified_name = slugified_name.replace( 'estimation-of-', '') slugified_name = slugified_name.replace('geodata-of-', '')[:90] event_type = standardEventTypesDict[typetag] tags = ['geodata'] if event_type: tags.append(event_type) dataset = Dataset({ 'name': slugified_name, 'title': title, 'notes': product_description }) dataset.set_maintainer('83fa9515-3ba4-4f1d-9860-f38b20f80442') dataset.add_country_location(iso3) dataset.add_tags(tags) dataset.set_expected_update_frequency('Never') dataset.set_dataset_date_from_datetime( unosatDBEntry['product_created']) gdb_link = unosatDBEntry['GDB_Link'] bitsgdb = gdb_link.split('/') shp_link = unosatDBEntry['SHP_Link'] bitsshp = shp_link.split('/') resources = [{ 'name': bitsgdb[len(bitsgdb) - 1], 'format': 'zipped geodatabase', 'url': gdb_link, 'description': 'Zipped geodatabase', }, { 'name': bitsshp[len(bitsshp) - 1], 'format': 'zipped shapefile', 'url': shp_link, 'description': 'Zipped shapefile', }] dataset.add_update_resources(resources) dataset.update_from_yaml() showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': 'Static PDF Map', 'notes': 'Static viewing map for printing.', 'url': 'https://unosat-maps.web.cern.ch/unosat-maps/%s/%s' % (unosatDBEntry['product_folder'], unosatDBEntry['product_url1']), 'image_url': 'https://unosat-maps.web.cern.ch/unosat-maps/%s/%s' % (unosatDBEntry['product_folder'], unosatDBEntry['product_img']) }) showcase.add_tags(tags) dataset.create_in_hdx(remove_additional_resources=True, hxl_update=False, updated_by_script='UNOSAT', batch=batch) showcase.create_in_hdx() showcase.add_dataset(dataset) with open('publishlog.txt', 'a+') as f: f.write('%s,%s\n' % (productID, dataset.get_hdx_url())) f.close() finally: connection.close()
def test_delete_from_hdx(self, configuration, post_delete): showcase = Showcase.read_from_hdx('TEST1') showcase.delete_from_hdx() del showcase['id'] with pytest.raises(HDXError): showcase.delete_from_hdx()
def test_update_in_hdx(self, configuration, post_update): showcase = Showcase() showcase['id'] = 'NOTEXIST' with pytest.raises(HDXError): showcase.update_in_hdx() showcase['title'] = 'LALA' with pytest.raises(HDXError): showcase.update_in_hdx() showcase = Showcase.read_from_hdx( '05e392bf-04e0-4ca6-848c-4e87bba10746') assert showcase['id'] == '05e392bf-04e0-4ca6-848c-4e87bba10746' assert showcase['title'] == 'MyShowcase1' showcase['name'] = 'TEST1' showcase['notes'] = 'lalalala' showcase.update_in_hdx() assert showcase['name'] == 'TEST1' assert showcase['notes'] == 'lalalala' assert showcase['state'] == 'active' expected = copy.deepcopy(showcase_resultdict) expected['notes'] = 'lalalala' expected['name'] = 'TEST1' expected['tags'] = [{ 'name': 'economics', 'vocabulary_id': '4381925f-0ae9-44a3-b30d-cae35598757b' }, { 'name': 'health', 'vocabulary_id': '4381925f-0ae9-44a3-b30d-cae35598757b' }] assert showcase.get_old_data_dict() == expected showcase['name'] = 'NOTEXIST' with pytest.raises(HDXError): showcase.update_in_hdx() del showcase['name'] with pytest.raises(HDXError): showcase.update_in_hdx() showcase_data = copy.deepcopy(TestShowcase.showcase_data) showcase_data['title'] = 'MyShowcase1' showcase_data['name'] = 'TEST1' showcase = Showcase(showcase_data) showcase.create_in_hdx() assert showcase['name'] == 'TEST1' assert showcase['notes'] == 'My Showcase' assert showcase['state'] == 'active'
def test_tags(self, configuration): showcase_data = copy.deepcopy(TestShowcase.showcase_data) showcase = Showcase(showcase_data) assert showcase.get_tags() == ['economy', 'health'] showcase.add_tag('wash') assert showcase.get_tags() == ['economy', 'health', 'wash'] showcase.add_tags(['sanitation']) assert showcase.get_tags() == [ 'economy', 'health', 'wash', 'sanitation' ] result = showcase.remove_tag('wash') assert result is True assert showcase.get_tags() == ['economy', 'health', 'sanitation'] showcase['tags'] = None result = showcase.remove_tag('wash') assert result is False
def generate_dataset_and_showcase(self, countryiso3, folder): countryname = Country.get_country_name_from_iso3(countryiso3) title = f'{countryname} - Food Prices' logger.info(f'Creating dataset: {title}') name = f'WFP food prices for {countryname}' slugified_name = slugify(name).lower() dataset = Dataset({ 'name': slugified_name, 'title': title, }) dataset.set_maintainer('f1921552-8c3e-47e9-9804-579b14a83ee3') dataset.set_organization('3ecac442-7fed-448d-8f78-b385ef6f84e7') dataset.set_expected_update_frequency('weekly') dataset.add_country_location(countryname) dataset.set_subnational(True) tags = ['commodities', 'prices', 'markets', 'hxl'] dataset.add_tags(tags) prices_data = self.get_list('MarketPrices/PriceMonthly', countryiso3) if not prices_data: logger.info(f'{countryiso3} has no prices data!') return None, None, None market_to_adm = dict() for market in self.get_list('Markets/List', countryiso3): market_to_adm[market['marketId']] = market['admin1Name'], market['admin2Name'], market['marketLatitude'],\ market['marketLongitude'] rows = dict() sources = dict() markets = dict() for price_data in prices_data: if price_data['commodityPriceFlag'] not in ('actual', 'aggregate'): continue date = price_data['commodityPriceDate'] category = self.commodity_to_category[price_data['commodityID']] market = price_data['marketName'] if market == 'National Average': adm1 = adm2 = lat = lon = '' else: market_id = price_data['marketID'] if market_id in market_to_adm: adm1, adm2, lat, lon = market_to_adm[market_id] else: adm1 = adm2 = lat = lon = '' orig_source = price_data['commodityPriceSourceName'].replace( 'M/o', 'Ministry of').replace('+', '/') regex = r'Government.*,(Ministry.*)' match = re.search(regex, orig_source) if match: split_sources = [match.group(1)] else: split_sources = orig_source.replace(',', '/').replace( ';', '/').split('/') for source in split_sources: source = source.strip() if not source: continue if source[-1] == '.': source = source[:-1] source_lower = source.lower() if 'mvam' in source_lower and len(source_lower) <= 8: source = 'WFP mVAM' elif '?stica' in source: source = source.replace('?stica', 'ística') source_lower = source.lower() if not self.match_source(sources.keys(), source_lower): sources[source_lower] = source commodity = price_data['commodityName'] unit = price_data['commodityUnitName'] price = price_data['commodityPrice'] currency = price_data['currencyName'] pricetype = price_data['commodityPriceFlag'] key = date, adm1, adm2, market, category, commodity, unit rows[key] = { 'date': date, 'adm1name': adm1, 'adm2name': adm2, 'market': market, 'latitude': lat, 'longitude': lon, 'category': category, 'commodity': commodity, 'unit': unit, 'currency': currency, 'pricetype': pricetype, 'price': price } if adm1 and adm2 and category: adm1adm2market = adm1, adm2, market commodities = markets.get(adm1adm2market, dict()) dict_of_lists_add(commodities, (commodity, unit, currency), (date, price)) markets[adm1adm2market] = commodities if not rows: logger.info(f'{countryiso3} has no prices!') return None, None, None number_market = list() for key, commodities in markets.items(): number_market.append((len(commodities), key)) number_market = sorted(number_market, reverse=True) qc_indicators = list() qc_rows = [qc_hxltags] chosen_commodities = set() # Go through markets starting with the one with most commodities for _, adm1adm2market in number_market: commodities = markets[adm1adm2market] number_commodity = list() for commodityunitcurrency, details in commodities.items(): number_commodity.append((len(details), commodityunitcurrency)) number_commodity = sorted(number_commodity, reverse=True) index = 0 # Pick commodity with most rows that has not already been used for another market commodity, unit, currency = number_commodity[index][1] while commodity in chosen_commodities: index += 1 if index == len(number_commodity): commodity, unit, currency = number_commodity[0][1] break commodity, unit, currency = number_commodity[index][1] adm1, adm2, market = adm1adm2market code = f'{adm1}-{adm2}-{market}-{commodity}-{unit}-{currency}' for date, price in sorted(commodities[(commodity, unit, currency)]): qc_rows.append({'date': date, 'code': code, 'price': price}) chosen_commodities.add(commodity) marketname = market if adm2 != market: marketname = f'{adm2}/{marketname}' if adm1 != adm2: marketname = f'{adm1}/{marketname}' qc_indicators.append({ 'code': code, 'title': f'Price of {commodity} in {market}', 'unit': f'Currency {currency}', 'description': f'Price of {commodity} ({currency}/{unit}) in {marketname}', 'code_col': '#meta+code', 'value_col': '#value', 'date_col': '#date' }) if len(qc_indicators) == 3: break dataset['dataset_source'] = ', '.join(sorted(sources.values())) filename = f'wfp_food_prices_{countryiso3.lower()}.csv' resourcedata = { 'name': title, 'description': 'Food prices data with HXL tags', 'format': 'csv' } rows = [rows[key] for key in sorted(rows)] dataset.generate_resource_from_iterator(headers, rows, hxltags, folder, filename, resourcedata, datecol='date') filename = f'wfp_food_prices_{countryiso3.lower()}_qc.csv' resourcedata = { 'name': f'QuickCharts: {title}', 'description': 'Food prices QuickCharts data with HXL tags', 'format': 'csv' } dataset.generate_resource_from_rows(folder, filename, qc_rows, resourcedata, headers=list(qc_hxltags.keys())) showcase = Showcase({ 'name': f'{slugified_name}-showcase', 'title': f'{title} showcase', 'notes': f'{countryname} food prices data from World Food Programme displayed through VAM Economic Explorer', 'url': f'http://dataviz.vam.wfp.org/economic_explorer/prices?iso3={countryiso3}', 'image_url': 'http://dataviz.vam.wfp.org/_images/home/3_economic.jpg' }) showcase.add_tags(tags) return dataset, showcase, qc_indicators
def generate_dataset_and_showcases( downloader, countryiso, indicator_metadata, countryalias ): """Parse json of the form: {'id': '1482', 'title': 'The spatial distribution of population in 2000, Zimbabwe', 'desc': 'Estimated total number of people per grid-cell...', 'doi': '10.5258/SOTON/WP00645', 'date': '2018-11-01', 'popyear': '2000', 'citation': 'WorldPop', 'data_file': 'GIS/Population/Global_2000_2020/2000/ZWE/zwe_ppp_2000.tif', 'archive': 'N', 'public': 'Y', 'source': 'WorldPop, University of Southampton, UK', 'data_format': 'Geotiff', 'author_email': '*****@*****.**', 'author_name': 'WorldPop', 'maintainer_name': 'WorldPop', 'maintainer_email': '*****@*****.**', 'project': 'Population', 'category': 'Global per country 2000-2020', 'gtype': 'Population', 'continent': 'Africa', 'country': 'Zimbabwe', 'iso3': 'ZWE', 'files': ['ftp://ftp.worldpop.org.uk/GIS/Population/Global_2000_2020/2000/ZWE/zwe_ppp_2000.tif'], 'url_img': 'https://www.worldpop.org/tabs/gdata/img/1482/zwe_ppp_wpgp_2000_Image.png', 'organisation': 'WorldPop, University of Southampton, UK, www.worldpop.org', 'license': 'https://www.worldpop.org/data/licence.txt', 'url_summary': 'https://www.worldpop.org/geodata/summary?id=1482'} """ allmetadata = dict() for subalias in countryalias: urls = countryalias[subalias] allmetadata_subalias = allmetadata.get(subalias, list()) for url in urls: downloader.download(url) json = downloader.get_json() data = json["data"] if isinstance(data, list): allmetadata_subalias.extend(data) else: allmetadata_subalias.append(data) allmetadata[subalias] = allmetadata_subalias allmetadatavalues = list(allmetadata.values()) lastmetadata = allmetadatavalues[0][-1] indicator_title = indicator_metadata["title"] if countryiso == "World": countryname = countryiso else: countryname = Country.get_country_name_from_iso3(countryiso) if not countryname: logger.exception(f"ISO3 {countryiso} not recognised!") return None, None title = f"{countryname} - {indicator_title}" slugified_name = slugify(f"WorldPop {indicator_title} for {countryname}").lower() logger.info(f"Creating dataset: {title}") licence_url = lastmetadata[ "license" ].lower() # suggest that they remove license and rename this field license downloader.download(licence_url) licence = downloader.get_text() methodologies = list() url_imgs = list() for allmetadatavalue in allmetadatavalues: lastallmetadatavalue = allmetadatavalue[-1] methodologies.append(lastallmetadatavalue["desc"]) url_img = lastallmetadatavalue["url_img"] if not url_img: for lastallmetadatavalue in reversed(allmetadatavalue[:-1]): url_img = lastallmetadatavalue["url_img"] if url_img: break url_imgs.append(url_img) methodology = get_matching_then_nonmatching_text(methodologies) dataset = Dataset( { "name": slugified_name, "title": title, "notes": f"{indicator_metadata['desc']} \nData for earlier dates is available directly from WorldPop. \n \n{lastmetadata['citation']}", "methodology": "Other", "methodology_other": methodology, "dataset_source": lastmetadata["source"], "license_id": "hdx-other", "license_other": licence, "private": False, } ) dataset.set_maintainer("37023db4-a571-4f28-8d1f-15f0353586af") dataset.set_organization("3f077dff-1d05-484d-a7c2-4cb620f22689") dataset.set_expected_update_frequency("Every year") dataset.set_subnational(True) try: dataset.add_other_location(countryiso) except HDXError as e: logger.exception(f"{countryname} has a problem! {e}") return None, None tags = [indicator_metadata["name"].lower(), "geodata"] dataset.add_tags(tags) earliest_year = 10000 latest_year = 0 resources_dict = dict() for subalias in allmetadata: for metadata in allmetadata[subalias]: if metadata["public"].lower() != "y": continue year = metadata["popyear"] if not year: year = metadata["date"][:4] year = int(year) if year > latest_year: latest_year = year if year < earliest_year: earliest_year = year for url in sorted(metadata["files"], reverse=True): resource_name = url[url.rfind("/") + 1 :] description = metadata["title"] if not re.match(r".*([1-3][0-9]{3})", resource_name): resource_parts = resource_name.split(".") resource_name = f"{resource_parts[0]}_{year}" if len(resource_parts) >= 2: resource_name = f"{resource_name}.{resource_parts[1]}" description = f"{description} in {year}" resource = { "name": resource_name, "format": metadata["data_format"], "url": url, "description": description, } dict_of_lists_add(resources_dict, year, resource) if not resources_dict: logger.error(f"{title} has no data!") return None, None for year in sorted(resources_dict.keys(), reverse=True)[:5]: # Just get last 5 years of data for resource in resources_dict[year]: dataset.add_update_resource(resource) dataset.set_dataset_year_range(earliest_year, latest_year) showcases = list() for i, url_img in enumerate(url_imgs): if not url_img: continue allmetadatavalue = allmetadatavalues[i][-1] url_summary = allmetadatavalue["url_summary"] if i == 0: name = f"{slugified_name}-showcase" else: name = f"{slugified_name}-{i + 1}-showcase" showcase = Showcase( { "name": name, "title": f"WorldPop {countryname} {indicator_title} Summary Page", "notes": f"Summary for {allmetadatavalue['category']} - {countryname}", "url": url_summary, "image_url": url_img, } ) showcase.add_tags(tags) showcases.append(showcase) return dataset, showcases
def generate_indicator_datasets_and_showcase(downloader, folder, indicators, tags): datasets = dict() countriesdata = dict() headersdata = dict() for indicator in indicators: metadata = downloader.download_tabular_key_value( indicator['spreadsheet']) name = metadata['Indicator Name'] title = name dataset = get_dataset(title, tags, 'idmc-%s' % name) dataset[ 'notes'] = "%s\n\nContains data from IDMC's [Global Internal Displacement Database](http://www.internal-displacement.org/database/displacement-data)." % metadata[ 'Long definition'] dataset['methodology_other'] = metadata[ 'Statistical concept and methodology'] dataset['caveats'] = metadata['Limitations and exceptions'] dataset.add_other_location('world') url = indicator['url'] name = indicator['name'] path = downloader.download_file(url, folder, '%s.xlsx' % name) data = hxl.data(path, allow_local=True) headers = data.headers hxltags = data.display_tags headersdata[name] = headers, hxltags years = set() rows = [headers, hxltags] for row in data: newrow = list() for hxltag in hxltags: newrow.append(row.get(hxltag)) rows.append(newrow) iso3 = row.get('#country+code') epcountrydata = countriesdata.get(iso3, dict()) dict_of_lists_add(epcountrydata, name, row) countriesdata[iso3] = epcountrydata year = row.get('#date+year') if year is None: continue years.add(year) resourcedata = {'name': name, 'description': title} filename = '%s.csv' % name dataset.generate_resource_from_rows(folder, filename, rows, resourcedata) years = sorted(list(years)) dataset.set_dataset_year_range(years[0], years[-1]) datasets[name] = dataset title = 'IDMC Global Report on Internal Displacement' slugified_name = slugify(title).lower() showcase = Showcase({ 'name': slugified_name, 'title': title, 'notes': 'Click the image on the right to go to the %s' % title, 'url': 'http://www.internal-displacement.org/global-report/grid2018/', 'image_url': 'http://www.internal-displacement.org/global-report/grid2018/img/ogimage.jpg' }) showcase.add_tags(tags) return datasets, showcase, headersdata, countriesdata
def generate_dataset_and_showcase(folder, countryiso, countrydata, qc_indicators): countryname = Country.get_country_name_from_iso3(countryiso) title = '%s - Human Development Indicators' % countryname slugified_name = slugify('HDRO data for %s' % countryname).lower() logger.info('Creating dataset: %s' % title) dataset = Dataset({'name': slugified_name, 'title': title}) dataset.set_maintainer('872427e4-7e9b-44d6-8c58-30d5052a00a2') dataset.set_organization('89ebe982-abe9-4748-9dde-cf04632757d6') dataset.set_expected_update_frequency('Every year') dataset.set_subnational(False) dataset.add_country_location(countryiso) tags = [ 'health', 'education', 'socioeconomic', 'demographics', 'development', 'indicators', 'hxl' ] dataset.add_tags(tags) filename = 'hdro_indicators_%s.csv' % countryiso resourcedata = { 'name': 'Human Development Indicators for %s' % countryname, 'description': 'Human development data with HXL tags' } quickcharts = { 'hashtag': '#indicator+code', 'values': [x['code'] for x in qc_indicators], 'cutdown': 2, 'cutdownhashtags': ['#indicator+code', '#date+year', '#indicator+value+num'] } def yearcol_function(row): result = dict() year = row['year'] if year: if len(year) == 9: startyear = year[:4] endyear = year[5:] result['startdate'], _ = parse_date_range(startyear, date_format='%Y') _, result['enddate'] = parse_date_range(endyear, date_format='%Y') else: result['startdate'], result['enddate'] = parse_date_range( year, date_format='%Y') return result success, results = dataset.generate_resource_from_iterator( countrydata[0].keys(), countrydata, hxltags, folder, filename, resourcedata, date_function=yearcol_function, quickcharts=quickcharts) if success is False: logger.error('%s has no data!' % countryname) return None, None, None showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': 'Indicators for %s' % countryname, 'notes': 'Human Development indicators for %s' % countryname, 'url': 'http://hdr.undp.org/en/countries/profiles/%s' % countryiso, 'image_url': 'https://s1.stabroeknews.com/images/2019/12/undp.jpg' }) showcase.add_tags(tags) return dataset, showcase, results['bites_disabled']
def generate_datasets_and_showcase(configuration, base_url, downloader, folder, country, dhstags): """ """ countryiso = country['iso3'] dhscountrycode = country['dhscode'] countryname = Country.get_country_name_from_iso3(countryiso) title = '%s - Demographic and Health Data' % countryname logger.info('Creating datasets for %s' % title) tags = ['hxl', 'health', 'demographics'] dataset = get_dataset(countryiso, tags) if dataset is None: return None, None, None, None dataset['title'] = title.replace('Demographic', 'National Demographic') slugified_name = slugify('DHS Data for %s' % countryname).lower() dataset['name'] = slugified_name dataset.set_subnational(False) subdataset = get_dataset(countryiso, tags) if dataset is None: return None, None, None, None subdataset['title'] = title.replace('Demographic', 'Subnational Demographic') subslugified_name = slugify('DHS Subnational Data for %s' % countryname).lower() subdataset['name'] = subslugified_name subdataset.set_subnational(True) dataset['notes'] = description % ( subdataset['title'], configuration.get_dataset_url(subslugified_name)) subdataset['notes'] = description % ( dataset['title'], configuration.get_dataset_url(slugified_name)) bites_disabled = {'national': dict(), 'subnational': dict()} def process_national_row(_, row): row['ISO3'] = countryiso if tagname == 'DHS Quickstats': process_quickstats_row(row, bites_disabled['national']) return row def process_subnational_row(_, row): row['ISO3'] = countryiso val = row['CharacteristicLabel'] if val[:2] == '..': val = val[2:] row['Location'] = val if tagname == 'DHS Quickstats': process_quickstats_row(row, bites_disabled['subnational']) return row years = set() subyears = set() for dhstag in dhstags: tagname = dhstag['TagName'].strip() resource_name = '%s Data for %s' % (tagname, countryname) resourcedata = { 'name': resource_name, 'description': 'HXLated csv containing %s data' % tagname } url = '%sdata/%s?tagids=%s&breakdown=national&perpage=10000&f=csv' % ( base_url, dhscountrycode, dhstag['TagID']) filename = '%s_national_%s.csv' % (tagname, countryiso) _, results = dataset.download_and_generate_resource( downloader, url, hxltags, folder, filename, resourcedata, header_insertions=[(0, 'ISO3')], row_function=process_national_row, yearcol='SurveyYear') years.update(results['years']) url = url.replace('breakdown=national', 'breakdown=subnational') filename = '%s_subnational_%s.csv' % (tagname, countryiso) try: insertions = [(0, 'ISO3'), (1, 'Location')] _, results = subdataset.download_and_generate_resource( downloader, url, hxltags, folder, filename, resourcedata, header_insertions=insertions, row_function=process_subnational_row, yearcol='SurveyYear') subyears.update(results['years']) except DownloadError as ex: cause = ex.__cause__ if cause is not None: if 'Variable RET is undefined' not in str(cause): raise ex else: raise ex if len(dataset.get_resources()) == 0: dataset = None else: set_dataset_date_bites(dataset, years, bites_disabled, 'national') if len(subdataset.get_resources()) == 0: subdataset = None else: set_dataset_date_bites(subdataset, subyears, bites_disabled, 'subnational') publication = get_publication(base_url, downloader, dhscountrycode) showcase = Showcase({ 'name': '%s-showcase' % slugified_name, 'title': publication['PublicationTitle'], 'notes': publication['PublicationDescription'], 'url': publication['PublicationURL'], 'image_url': publication['ThumbnailURL'] }) showcase.add_tags(tags) return dataset, subdataset, showcase, bites_disabled