def get_countries(countries_path, downloader): """Download a list of countries and provide mapping if necessary. A list of dictionaries is returned, each containing the following keys: iso3 - ISO 3 country code name - country name code - WFP country code """ countries = list() headers, iterator = downloader.get_tabular_rows(countries_path, headers=1, dict_form=True, format='csv') for row in iterator: wfp_name = row['ADM0_NAME'] code = row['ADM0_CODE'] iso3, fuzzy = Country.get_iso3_country_code_fuzzy(wfp_name) if iso3 is None: continue countries.append({'iso3': iso3, 'code': code}) return countries
def check_domain(ds: pd.DataFrame) -> [list, list]: """ if column domain is Country, check whether any column header does not belong to a country name Args: ds: the DataFrame to check the column headers on Returns: A list with countries that are not recognized as countries """ not_a_country = [] iso_list = [] if 'country' in ds.columns.name.lower(): for country in ds.columns: iso, fuzzy = Country.get_iso3_country_code_fuzzy(country, use_live=False) if iso is None: country_type = utils.check_country_fsp(country) if country_type is None: not_a_country.append(country) else: not_a_country.append( (country + " (Domain: " + country_type + ")")) else: iso_list.append(iso) return not_a_country, iso_list
def get_adm(admcol, i): template_string, match_string = match_template(admcol) if template_string: admcol = self.headers[int(match_string)] adm = row[admcol] if not adm: return False adm = adm.strip() adms[i] = adm if adm in self.adms[i]: return True exact = False if self.admexact: adms[i] = None else: if i == 0: adms[i], exact = Country.get_iso3_country_code_fuzzy(adm) elif i == 1: adms[i], exact = self.adminone.get_pcode( adms[0], adm, scrapername) if adms[i] not in self.adms[i]: adms[i] = None return exact
def get_adm(self, adms, admexact, i, scrapername): adm = adms[i] if adm in self.adms[i]: exact = True else: exact = False if admexact: adms[i] = None else: if i == 0: mappingadm = self.adm_mappings[0].get(adm) if mappingadm: adms[i] = mappingadm return True adms[i], _ = Country.get_iso3_country_code_fuzzy(adm) exact = False elif i == 1: adms[i], exact = self.get_pcode(adms[0], adm, scrapername) else: adms[i] = None if adms[i] not in self.adms[i]: adms[i] = None return exact
def add_food_prices(configuration, countryiso3s, downloader, scrapers=None): name = 'food_prices' if scrapers and not any(scraper in name for scraper in scrapers): return list(), list(), list() datasetinfo = configuration[name] headers, iterator = read_hdx(downloader, datasetinfo) allowed_months = set() for i in range(1, 7, 1): month = today.month - i if month > 0: allowed_months.add('%d/%d' % (today.year, month)) else: month = 12 - month allowed_months.add('%d/%d' % (today.year - 1, month)) commods_per_country = dict() affected_commods_per_country = dict() for row in iterator: year_month = '%s/%s' % (row['Year'], row['Month']) if year_month not in allowed_months: continue countryiso, _ = Country.get_iso3_country_code_fuzzy(row['Country']) if not countryiso or countryiso not in countryiso3s: continue commods_per_country[countryiso] = commods_per_country.get( countryiso, 0) + 1 if row['ALPS'] != 'Normal': affected_commods_per_country[ countryiso] = affected_commods_per_country.get(countryiso, 0) + 1 ratios = calculate_ratios(commods_per_country, affected_commods_per_country) hxltag = '#value+food+num+ratio' logger.info('Processed WFP') return [['Food Prices Ratio'], [hxltag] ], [ratios], [(hxltag, datasetinfo['date'], datasetinfo['source'], datasetinfo['source_url'])]
def generate_dataset_and_showcase(downloader, countrydata, endpoints_metadata, folder, merge_resources=True, single_dataset=False, split_to_resources_by_column="STAT_UNIT", remove_useless_columns=True): """ https://api.uis.unesco.org/sdmx/data/UNESCO,DEM_ECO/....AU.?format=csv-:-tab-true-y&locale=en&subscription-key=... :param downloader: Downloader object :param countrydata: Country datastructure from UNESCO API :param endpoints_metadata: Endpoint datastructure from UNESCO API :param folder: temporary folder :param merge_resources: if true, merge resources for all time periods :param single_dataset: if true, put all endpoints into a single dataset :param split_to_resources_by_column: split data into multiple resorces (csv) based on a value in the specified column :param remove_useless_columns: :return: generator yielding (dataset, showcase) tuples. It may yield None, None. """ countryiso2 = countrydata['id'] countryname = countrydata['names'][0]['value'] logger.info("Processing %s" % countryname) if countryname[:4] in ['WB: ', 'SDG:', 'MDG:', 'UIS:', 'EFA:'] or countryname[:5] in ['GEMR:', 'AIMS:'] or \ countryname[:7] in ['UNICEF:', 'UNESCO:']: logger.info('Ignoring %s!' % countryname) yield None, None return countryiso3 = Country.get_iso3_from_iso2(countryiso2) if countryiso3 is None: countryiso3, _ = Country.get_iso3_country_code_fuzzy(countryname) if countryiso3 is None: logger.exception('Cannot get iso3 code for %s!' % countryname) yield None, None return logger.info('Matched %s to %s!' % (countryname, countryiso3)) earliest_year = 10000 latest_year = 0 if single_dataset: name = 'UNESCO indicators - %s' % countryname dataset, showcase = create_dataset_showcase( name, countryname, countryiso2, countryiso3, single_dataset=single_dataset) if dataset is None: return for endpoint in sorted(endpoints_metadata): time.sleep(0.2) indicator, structure_url, more_info_url, dimensions = endpoints_metadata[ endpoint] structure_url = structure_url % countryiso2 response = load_safely(downloader, '%s%s' % (structure_url, dataurl_suffix)) json = response.json() if not single_dataset: name = 'UNESCO %s - %s' % (json["structure"]["name"], countryname) dataset, showcase = create_dataset_showcase( name, countryname, countryiso2, countryiso3, single_dataset=single_dataset) if dataset is None: continue observations = json['structure']['dimensions']['observation'] time_periods = dict() for observation in observations: if observation['id'] == 'TIME_PERIOD': for value in observation['values']: time_periods[int(value['id'])] = value['actualObs'] if len(time_periods) == 0: logger.warning('No time periods for endpoint %s for country %s!' % (indicator, countryname)) continue earliest_year = min(earliest_year, *time_periods.keys()) latest_year = max(latest_year, *time_periods.keys()) csv_url = '%sformat=csv' % structure_url description = more_info_url if description != ' ': description = '[Info on %s](%s)' % (indicator, description) description = 'To save, right click download button & click Save Link/Target As \n%s' % description df = None for start_year, end_year in chunk_years(time_periods): if merge_resources: df1 = download_df(downloader, csv_url, start_year, end_year) if df1 is not None: df = df1 if df is None else df.append(df1) else: url_years = '&startPeriod=%d&endPeriod=%d' % (start_year, end_year) resource = { 'name': '%s (%d-%d)' % (indicator, start_year, end_year), 'description': description, 'format': 'csv', 'url': downloader.get_full_url('%s%s' % (csv_url, url_years)) } dataset.add_update_resource(resource) if df is not None: stat = { x["id"]: x["name"] for d in dimensions if d["id"] == "STAT_UNIT" for x in d["values"] } for value, df_part in split_df_by_column( process_df(df), split_to_resources_by_column): file_csv = join( folder, ("UNESCO_%s_%s.csv" % (countryiso3, endpoint + ("" if value is None else "_" + value))).replace( " ", "-").replace(":", "-").replace("/", "-").replace( ",", "-").replace("(", "-").replace(")", "-")) if remove_useless_columns: df_part = remove_useless_columns_from_df(df_part) df_part["country-iso3"] = countryiso3 df_part.iloc[ 0, df_part.columns.get_loc("country-iso3")] = "#country+iso3" df_part["Indicator name"] = value df_part.iloc[0, df_part.columns.get_loc("Indicator name" )] = "#indicator+name" df_part = postprocess_df(df_part) df_part.to_csv(file_csv, index=False) description_part = stat.get( value, 'Info on %s%s' % ("" if value is None else value + " in ", indicator)) resource = Resource({ 'name': value, 'description': description_part }) resource.set_file_type('csv') resource.set_file_to_upload(file_csv) dataset.add_update_resource(resource) if not single_dataset: if dataset is None or len(dataset.get_resources()) == 0: logger.error('No resources created for country %s, %s!' % (countryname, endpoint)) else: dataset.set_dataset_year_range(min(time_periods.keys()), max(time_periods.keys())) yield dataset, showcase if single_dataset: if dataset is None or len(dataset.get_resources()) == 0: logger.error('No resources created for country %s!' % (countryname)) else: dataset.set_dataset_year_range(earliest_year, latest_year) yield dataset, showcase
def test_get_iso3_country_code(self): assert Country.get_iso3_country_code('jpn', use_live=False) == 'JPN' assert Country.get_iso3_country_code('Dem. Rep. of the Congo', use_live=False) == 'COD' assert Country.get_iso3_country_code('Russian Fed.', use_live=False) == 'RUS' assert Country.get_iso3_country_code( 'Micronesia (Federated States of)', use_live=False) == 'FSM' assert Country.get_iso3_country_code('Iran (Islamic Rep. of)', use_live=False) == 'IRN' assert Country.get_iso3_country_code('United Rep. of Tanzania', use_live=False) == 'TZA' assert Country.get_iso3_country_code('Syrian Arab Rep.', use_live=False) == 'SYR' assert Country.get_iso3_country_code('Central African Rep.', use_live=False) == 'CAF' assert Country.get_iso3_country_code('Rep. of Korea', use_live=False) == 'KOR' assert Country.get_iso3_country_code('St. Pierre and Miquelon', use_live=False) == 'SPM' assert Country.get_iso3_country_code('Christmas Isl.', use_live=False) == 'CXR' assert Country.get_iso3_country_code('Cayman Isl.', use_live=False) == 'CYM' assert Country.get_iso3_country_code('jp', use_live=False) == 'JPN' assert Country.get_iso3_country_code('Taiwan (Province of China)', use_live=False) == 'TWN' assert Country.get_iso3_country_code_fuzzy('jpn', use_live=False) == ('JPN', True) assert Country.get_iso3_country_code_fuzzy('ZWE', use_live=False) == ('ZWE', True) assert Country.get_iso3_country_code_fuzzy('Vut', use_live=False) == ('VUT', True) assert Country.get_iso3_country_code('abc', use_live=False) is None with pytest.raises(LocationError): Country.get_iso3_country_code('abc', use_live=False, exception=LocationError) assert Country.get_iso3_country_code_fuzzy('abc', use_live=False) == (None, False) with pytest.raises(LocationError): Country.get_iso3_country_code_fuzzy('abc', use_live=False, exception=LocationError) assert Country.get_iso3_country_code_fuzzy('United Kingdom', use_live=False) == ('GBR', False) assert Country.get_iso3_country_code_fuzzy( 'United Kingdom of Great Britain and Northern Ireland', use_live=False) == ('GBR', True) assert Country.get_iso3_country_code_fuzzy('united states', use_live=False) == ('USA', False) assert Country.get_iso3_country_code_fuzzy('united states of america', use_live=False) == ('USA', True) assert Country.get_iso3_country_code('UZBEKISTAN', use_live=False) == 'UZB' assert Country.get_iso3_country_code_fuzzy('UZBEKISTAN', use_live=False) == ('UZB', True) assert Country.get_iso3_country_code('Sierra', use_live=False) is None assert Country.get_iso3_country_code_fuzzy('Sierra', use_live=False) == ('SLE', False) assert Country.get_iso3_country_code('Venezuela', use_live=False) is None assert Country.get_iso3_country_code_fuzzy('Venezuela', use_live=False) == ('VEN', False) assert Country.get_iso3_country_code_fuzzy('Heard Isl.', use_live=False) == ('HMD', False) assert Country.get_iso3_country_code_fuzzy('Falkland Isl.', use_live=False) == ('FLK', False) assert Country.get_iso3_country_code_fuzzy('Czech Republic', use_live=False) == ('CZE', False) assert Country.get_iso3_country_code_fuzzy('Czech Rep.', use_live=False) == ('CZE', False) assert Country.get_iso3_country_code_fuzzy('Islamic Rep. of Iran', use_live=False) == ('IRN', False) assert Country.get_iso3_country_code_fuzzy('Dem. Congo', use_live=False) == ('COD', False) assert Country.get_iso3_country_code_fuzzy('Congo, Republic of', use_live=False) == ('COG', False) assert Country.get_iso3_country_code_fuzzy('Republic of the Congo', use_live=False) == ('COG', False) assert Country.get_iso3_country_code_fuzzy('Vietnam', use_live=False) == ('VNM', False) assert Country.get_iso3_country_code_fuzzy('South Korea', use_live=False) == ('KOR', False) assert Country.get_iso3_country_code_fuzzy('Korea Republic', use_live=False) == ('KOR', False) assert Country.get_iso3_country_code_fuzzy('Dem. Republic Korea', use_live=False) == ('PRK', False) assert Country.get_iso3_country_code_fuzzy('North Korea', use_live=False) == ('PRK', False) assert Country.get_iso3_country_code_fuzzy( 'Serbia and Kosovo: S/RES/1244 (1999)', use_live=False) == ('SRB', False) assert Country.get_iso3_country_code_fuzzy('U.S. Virgin Islands', use_live=False) == ('VIR', True) assert Country.get_iso3_country_code_fuzzy('U.K. Virgin Islands', use_live=False) == ('VGB', False) assert Country.get_iso3_country_code_fuzzy('Taiwan', use_live=False) == ('TWN', False) with pytest.raises(ValueError): Country.get_iso3_country_code('abc', use_live=False, exception=ValueError) with pytest.raises(ValueError): Country.get_iso3_country_code_fuzzy('abc', use_live=False, exception=ValueError)
def main(): configuration = Configuration.read() enddays = configuration['enddays'] ignore_users = configuration['ignore_users'] users_scrapers = configuration['users_scrapers'] spreadsheet_url = configuration['spreadsheet_url'] sheetname = configuration['sheetname'] logger.info('> GSheet Credentials: %s' % gsheet_auth) users = dict() info = json.loads(gsheet_auth) scopes = ['https://www.googleapis.com/auth/spreadsheets'] credentials = service_account.Credentials.from_service_account_info(info, scopes=scopes) gc = pygsheets.authorize(custom_credentials=credentials) spreadsheet = gc.open_by_url(spreadsheet_url) sheet = spreadsheet.worksheet_by_title(sheetname) keys = sheet.get_row(1) rows = [keys] crisisdata = configuration['crisisdata'] for crisis in crisisdata: data = crisisdata[crisis] startdate = parse_date(data['startdate']) enddate = startdate + timedelta(days=enddays) searchlist = list() for country in data.get('countries', list()): iso3, _ = Country.get_iso3_country_code_fuzzy(country) searchlist.append('groups:%s' % iso3.lower()) for tag in data.get('tags', list()): searchlist.append('vocab_Topics:"%s"' % tag.lower()) search_string = 'metadata_created:[2000-01-01T00:00:00.000Z TO %sZ] AND (%s)' % (enddate.isoformat(), ' OR '.join(searchlist)) datasets = Dataset.search_in_hdx(fq=search_string) row = {'ID': data['id'], 'Crisis name': crisis} count = 0 largest_activities = 0 for dataset in datasets: metadata_created_str = dataset['metadata_created'] orgname = dataset['organization']['name'] metadata_created = parse_date(metadata_created_str) new_or_updated = 'new' updated_when = '' updated_by = '' # if metadata_created < startdate: # activities = Activity.get_all_activities(id=dataset['id'], limit=10000) # activities_len = len(activities) # if activities_len > largest_activities: # largest_activities = activities_len # found = False # for activity in activities: # timestamp = activity['timestamp'] # activity_date = parse_date(timestamp) # if startdate < activity_date < enddate: # new_or_updated = 'updated' # updated_when = timestamp # user_id = activity['user_id'] # check_ignore = True # for user_scrapers in users_scrapers: # if user_id == user_scrapers['id']: # if orgname in user_scrapers['scrapers']: # check_ignore = False # break # if check_ignore: # if user_id in ignore_users: # continue # username = users.get(user_id) # if username is None: # user = User.read_from_hdx(user_id) # username = get_user_name(user) # users[user_id] = username # updated_by = username # found = True # break # if not found: # continue row['dataset title'] = dataset['title'] row['dataset id'] = dataset['id'] row['dataset url'] = dataset.get_hdx_url() row['org name'] = orgname row['org id'] = dataset['organization']['id'] row['created'] = metadata_created_str row['new or updated'] = new_or_updated row['updated when'] = updated_when row['updated by'] = updated_by rows.append([row.get(key, '') for key in keys]) count += 1 logger.info('%s: %d\t%s' % (crisis, count, search_string)) sheet.clear() sheet.update_values('A1', rows) logger.info('Longest activities: %d' % largest_activities)
# 'Botswana': 'Southern Africa ex SA', 'Malawi': 'Southern Africa ex SA', 'Mozambique': 'Southern Africa ex SA', 'Zimbabwe': 'Southern Africa ex SA', # 'Cameroon': 'West Africa', 'Equatorial Guinea': 'West Africa', 'Gabon': 'West Africa', 'Mali': 'West Africa', 'Nigeria': 'West Africa', 'Senegal': 'West Africa' } iso32loc = { Country.get_iso3_country_code_fuzzy(c)[0]: loc for (c, loc) in country2loc.items() if Country.get_iso3_country_code_fuzzy(c)[0] } if '__main__' == __name__: import argparse parser = argparse.ArgumentParser() parser.add_argument('--drm_tab', required=True, type=str) parser.add_argument('--input_data', required=True, type=str) parser.add_argument('--output_data', required=True, type=str) params = parser.parse_args() df = pd.read_csv(params.drm_tab, index_col=0, header=0, sep='\t')
def generate_map(data, country, location, html, tree=None, data_sep='\t', id_index=0, colours=None): df = pd.read_csv(data, sep=data_sep, header=0, index_col=id_index) if country not in df.columns: raise ValueError( 'The country column {} not found among the annotation columns: {}.' .format(country, df.columns)) if location not in df.columns: raise ValueError( 'The location column {} not found among the annotation columns: {}.' .format(location, df.columns)) df.sort_values(by=[location], inplace=True, na_position='last') ddf = df.drop_duplicates(subset=[country], inplace=False, keep='first') country2location = { c: l for c, l in zip(ddf[country], ddf[location]) if not pd.isnull(c) and not pd.isnull(l) } if tree: df = df[np.in1d(df.index.astype(np.str), [_.name for _ in read_tree(tree)])] unique_countries = {_ for _ in df[country].unique() if not pd.isnull(_)} if ISO_EXISTS: country2iso = { _: Country.get_iso2_from_iso3(iso) for (_, iso) in ((_, Country.get_iso3_country_code_fuzzy(_)[0]) for _ in country2location.keys()) if iso and _ in unique_countries } else: country2iso = { _: escape(_) for _ in country2location.keys() if _ in unique_countries } iso2num = { iso: len(df[df[country] == c]) for c, iso in country2iso.items() } iso2loc = {iso: country2location[c] for c, iso in country2iso.items()} iso2loc_num = { iso: len(df[df[location] == loc]) for iso, loc in iso2loc.items() } iso2tooltip = { iso: escape('{}: {} samples (out of {} in {})'.format( c, iso2num[iso], iso2loc_num[iso], iso2loc[iso])) for (c, iso) in country2iso.items() } locations = sorted([_ for _ in df[location].unique() if not pd.isnull(_)]) num_unique_values = len(locations) if colours: colours = parse_colours(colours, locations) else: colours = get_enough_colours(num_unique_values) iso2colour = { iso: colours[locations.index(loc)] for iso, loc in iso2loc.items() } env = Environment(loader=PackageLoader('pastml')) template = env.get_template('geo_map.html') page = template.render(iso2colour=iso2colour, colours=colours, iso2tooltip=iso2tooltip) os.makedirs(os.path.abspath(os.path.dirname(html)), exist_ok=True) with open(html, 'w+') as fp: fp.write(page)
def get_camp_non_camp_populations(noncamp_types, camp_types, camp_overrides, datasets, downloader): noncamp_types = noncamp_types.split(',') camp_types = camp_types.split(',') dataset_unhcr = None latest_date = None for dataset in datasets: if 'displacement' in dataset['title'].lower(): date = dataset.get_dataset_date_as_datetime() if latest_date is None or date > latest_date: dataset_unhcr = dataset latest_date = date if dataset_unhcr is None: raise ValueError('No UNHCR dataset found!') url = dataset_unhcr.get_resources()[0]['url'] country_ind = 0 # assume first column contains country iso3 = None row = None prev_row = None all_camps_per_country = dict() unhcr_non_camp = dict() unhcr_camp = dict() unhcr_camp_excluded = dict() rowiter = downloader.get_tabular_rows(url, sheet='Tab15') for row in rowiter: country = row[country_ind] iso3 = Country.get_iso3_country_code(country) if iso3 is not None: break prev_row = row accommodation_ind = None location_ind = None population_ind = None population = None for i, text in enumerate(prev_row): header = text.lower() value = row[i] if 'accommodation' in header: accommodation_ind = i elif 'location' in header and len(value) > 1: location_ind = i else: try: population = int(value) population_ind = i break except ValueError: pass campname = row[location_ind] def get_accommodation_type(name): accom_type = camp_overrides['Accommodation Type'].get(name) if accom_type is None: accom_type = row[accommodation_ind] else: logger.info('Overriding accommodation type to %s for %s' % (accom_type, name)) return accom_type.lower() accommodation_type = get_accommodation_type(campname) def match_camp_types(name, accom_type, pop, iso): if check_name_dispersed(name): accom_type = noncamp_types[0] found_camp_type = None for camp_type in camp_types: if camp_type in accom_type: found_camp_type = camp_type unhcr_camp[name] = pop, iso, found_camp_type break for noncamp_type in noncamp_types: if noncamp_type in accom_type: found_camp_type = noncamp_type append_value(unhcr_non_camp, iso, found_camp_type, name, pop) break if found_camp_type is None: append_value(unhcr_camp_excluded, iso, accom_type, name, pop) append_value(all_camps_per_country, iso, accom_type, name, pop) else: append_value(all_camps_per_country, iso, found_camp_type, name, pop) match_camp_types(campname, accommodation_type, population, iso3) for row in rowiter: country = row[country_ind] if not country: continue if 'NOTES' in country.upper(): break iso3, match = Country.get_iso3_country_code_fuzzy(country) if iso3 is None: logger.warning('Country %s could not be matched to ISO3 code!' % country) continue else: if match is False: logger.info('Matched %s to ISO3: %s!' % (country, iso3)) campname = row[location_ind] accommodation_type = get_accommodation_type(campname) population = int(row[population_ind]) match_camp_types(campname, accommodation_type, population, iso3) for campname in sorted(camp_overrides['Population']): if campname in unhcr_camp: continue iso3 = camp_overrides['Country'][campname] accommodation_type = camp_overrides['Accommodation Type'][ campname].lower() population = camp_overrides['Population'][campname] logger.info('Adding camp from override: %s (%s, %s): %d' % (campname, iso3, accommodation_type, population)) match_camp_types(campname, accommodation_type, population, iso3) return all_camps_per_country, unhcr_non_camp, unhcr_camp, unhcr_camp_excluded
def get_iso3(name): iso3, match = Country.get_iso3_country_code_fuzzy(name, exception=ValueError) if not match: logger.info('Country %s matched to ISO3: %s!' % (name, iso3)) return iso3
def get_iso3(_): iso3 = Country.get_iso3_country_code_fuzzy(_)[0] return iso3 if iso3 else _
def generate_dataset(dataset_id, configuration, downloader, output_failures=False): metadata_url = configuration["metadata_url"] % dataset_id response = downloader.download( f"{configuration['base_url']}{metadata_url}") json = response.json() study_desc = json["study_desc"] title_statement = study_desc["title_statement"] title = title_statement["title"] logger.info(f"Creating dataset: {title}") study_info = study_desc["study_info"] data_collection = study_desc["method"]["data_collection"] sources = [x["name"] for x in study_desc["authoring_entity"]] methodology = list() data_kind = study_info.get("data_kind") if data_kind is not None: methodology.append(f"Kind of Data: {data_kind} \n") unit_analysis = study_info.get("universe") if unit_analysis is None: unit_analysis = study_info.get("analysis_unit") if unit_analysis is not None: methodology.append(f"Unit of Analysis: {unit_analysis} \n") sampling = data_collection.get("sampling_procedure") if sampling is not None: methodology.append(f"Sampling Procedure: {sampling} \n") collection = data_collection.get("coll_mode") if collection is not None: methodology.append(f"Data Collection Mode: {collection} \n") dataset_name = slugify(title_statement["idno"]) countryiso3s = set() for nation in study_info["nation"]: countryiso3 = nation["abbreviation"] if not countryiso3: countryname = nation["name"] if countryname: countryiso3, _ = Country.get_iso3_country_code_fuzzy( countryname) if countryiso3: countryiso3s.add(countryiso3) if len(countryiso3s) == 1: countryname = Country.get_country_name_from_iso3(min(countryiso3s)) title = f"{countryname} - {title}" dataset = Dataset({ "name": dataset_name, "title": title, "notes": study_info["abstract"], "dataset_source": ", ".join(sources), "methodology": "Other", "methodology_other": "".join(methodology), }) dataset.set_maintainer("ac47b0c8-548b-4c37-a685-7377e75aad55") dataset.set_organization("abf4ca86-8e69-40b1-92f7-71509992be88") dataset.set_expected_update_frequency("Never") dataset.set_subnational(True) if output_failures: try: dataset.add_country_locations(countryiso3s) except HDXError: ui_url = configuration["ui_url"] % dataset_id url = f"{configuration['base_url']}{ui_url}" failures.append( f"Invalid country id {countryiso3s} in dataset {url}!") return None else: dataset.add_country_locations(countryiso3s) tags = list() def add_tags(inwords, key): for inword in inwords: inword = inword[key].strip().lower() if "," in inword: words = inword.split(",") elif "/" in inword: words = inword.split("/") else: words = [inword] newwords = list() for innerword in words: if "and" in innerword: newwords.extend(innerword.split(" and ")) elif "&" in innerword: newwords.extend(innerword.split(" & ")) elif "other" in innerword: newwords.extend(innerword.split("other")) else: newwords.append(innerword) for word in newwords: word = word.strip() if word: tags.append(word.strip()) add_tags(study_info["topics"], "topic") add_tags(study_info.get("keywords", list()), "keyword") dataset.add_tags(tags) dataset.clean_tags() coll_dates = study_info["coll_dates"][0] startdate, _ = parse_date_range(coll_dates["start"]) _, enddate = parse_date_range(coll_dates["end"]) dataset.set_date_of_dataset(startdate, enddate) auth_url = configuration["auth_url"] % dataset_id resourcedata = { "name": title, "description": 'Clicking "Download" leads outside HDX where you can request access to the data in csv, xlsx & dta formats', "url": f"{configuration['base_url']}{auth_url}", "format": "web app", } dataset.add_update_resource(resourcedata) documentation_url = configuration["documentation_url"] % dataset_id resourcedata = { "name": "Codebook", "description": "Contains information about the dataset's metadata and data", "url": f"{configuration['base_url']}{documentation_url}", "format": "pdf", } dataset.add_update_resource(resourcedata) return dataset
# #30DayMapChallenge # Día 26: Nueva Herramienta -> Python # Fragile States Index # Datos: https://fragilestatesindex.org/excel/ # Autora: Stephanie Orellana (@sporella) import pandas as pd import geopandas as gp import matplotlib.pyplot as plt from hdx.location.country import Country from pyproj import CRS df = pd.read_excel("data/fsi-2020.xlsx") df['iso_a3'] = df.apply( lambda row: Country.get_iso3_country_code_fuzzy(row["Country"])[0], axis=1) world = gp.read_file(gp.datasets.get_path('naturalearth_lowres')) world_dat = world.merge(df, on='iso_a3', how="left") gdf = world_dat.to_crs(CRS("ESRI:54009")) plt.rcParams.update({ "text.color": "black", "axes.facecolor": "black", "axes.edgecolor": "black", "axes.labelcolor": "white", "xtick.color": "white", "ytick.color": "white", "grid.color": "lightgray", "figure.facecolor": "black", "figure.edgecolor": "black", "savefig.facecolor": "black",