def test_get_iso2_from_iso3(self): assert Country.get_iso2_from_iso3('jpn', use_live=False) == 'JP' assert Country.get_iso2_from_iso3('abc', use_live=False) is None with pytest.raises(LocationError): Country.get_iso2_from_iso3('abc', use_live=False, exception=LocationError)
def convert_pcode_length(self, countryiso3, adm1_pcode, scrapername): if adm1_pcode in self.pcodes: return adm1_pcode pcode_length = len(adm1_pcode) country_pcodelength = self.pcode_lengths.get(countryiso3) if not country_pcodelength: return None if pcode_length == country_pcodelength or pcode_length < 4 or pcode_length > 6: return None if country_pcodelength == 4: pcode = '%s%s' % (Country.get_iso2_from_iso3(adm1_pcode[:3]), adm1_pcode[-2:]) elif country_pcodelength == 5: if pcode_length == 4: pcode = '%s0%s' % (adm1_pcode[:2], adm1_pcode[-2:]) else: pcode = '%s%s' % (Country.get_iso2_from_iso3(adm1_pcode[:3]), adm1_pcode[-3:]) elif country_pcodelength == 6: if pcode_length == 4: pcode = '%s0%s' % (Country.get_iso3_from_iso2(adm1_pcode[:2]), adm1_pcode[-2:]) else: pcode = '%s%s' % (Country.get_iso3_from_iso2(adm1_pcode[:2]), adm1_pcode[-3:]) else: pcode = None if pcode in self.pcodes: self.matches.add((scrapername, countryiso3, adm1_pcode, self.pcode_to_name[pcode], 'pcode length conversion')) return pcode return None
def get_WHO_data(config, country_iso3, hxlize=False, smooth_data=False, n_days_smoothing=14): # Download the file and move it to a local directory logger.info('Downloading WHO data from HDX') WHO_dir = os.path.join(config.INPUT_DIR, config.WHO_DIR) Path(WHO_dir).mkdir(parents=True, exist_ok=True) download_filename = list( query_api(config.WHO_HDX_ADDRESS, WHO_dir, resource_format='CSV').values())[0] final_filepath = os.path.join(WHO_dir, config.WHO_FILENAME) os.rename(os.path.join(WHO_dir, download_filename), final_filepath) # Get the data for the country based on ISO3 logger.info(f'Returning WHO data for {country_iso3}') df_WHO = pd.read_csv(final_filepath) df_WHO = df_WHO.loc[df_WHO['Country_code'] == Country.get_iso2_from_iso3( country_iso3)] #in some cases the WHO data might include sudden increases in numbers, due to for example changes in reporting #sudden bumps might cause distorted outcomes of the model and thus in those cases we smooth the WHO data before inputting it to the graph if smooth_data: df_WHO = df_WHO.sort_values(by='Date_reported', ascending=True) for column_name in [ 'New_cases', 'Cumulative_cases', 'New_deaths', 'Cumulative_deaths' ]: df_WHO[column_name] = df_WHO[column_name].rolling( window=n_days_smoothing).mean() if hxlize: df_WHO = df_WHO.rename(columns=HXL_DICT) return df_WHO
def convert_pcode_length(self, countryiso3, pcode, scrapername=None): # type: (str, str, Optional[str]) -> Optional[str] """Standardise pcode length by country and match to an internal pcode Args: countryiso3 (str): Iso3 country code pcode (str): P code for admin one scrapername (Optional[str]): Name of scraper for logging purposes. Defaults to None (don't log). Returns: Optional[str]: Matched P code or None if no match """ if pcode in self.pcodes: return pcode pcode_length = len(pcode) country_pcodelength = self.pcode_lengths.get(countryiso3) if not country_pcodelength: return None if pcode_length == country_pcodelength or pcode_length < 4 or pcode_length > 6: return None if country_pcodelength == 4: pcode = '%s%s' % (Country.get_iso2_from_iso3( pcode[:3]), pcode[-2:]) elif country_pcodelength == 5: if pcode_length == 4: pcode = '%s0%s' % (pcode[:2], pcode[-2:]) else: pcode = '%s%s' % (Country.get_iso2_from_iso3( pcode[:3]), pcode[-3:]) elif country_pcodelength == 6: if pcode_length == 4: pcode = '%s0%s' % (Country.get_iso3_from_iso2( pcode[:2]), pcode[-2:]) else: pcode = '%s%s' % (Country.get_iso3_from_iso2( pcode[:2]), pcode[-3:]) else: pcode = None if pcode in self.pcodes: if scrapername: self.matches.add( (scrapername, countryiso3, pcode, self.pcode_to_name[pcode], 'pcode length conversion')) return pcode return None
def get_ipc(configuration, admininfo, downloader, scrapers=None): name = inspect.currentframe().f_code.co_name if scrapers and not any(scraper in name for scraper in scrapers): return list(), list(), list() ipc_configuration = configuration['ipc'] url = ipc_configuration['url'] phasedict = dict() popdict = dict() for countryiso3 in admininfo.countryiso3s: countryiso2 = Country.get_iso2_from_iso3(countryiso3) data, adm1_names = get_data(downloader, url, countryiso2) if not data: continue for row in data: country = row['Country'] if adm1_names: if country not in adm1_names: continue adm1_name = country else: adm1_name = row['Area'] if not adm1_name or adm1_name == country: continue pcode, _ = admininfo.get_pcode(countryiso3, adm1_name, 'IPC') if not pcode: continue population = row['Current Phase P3+ #'] if population: dict_of_lists_add(popdict, pcode, population) percentage = row['Current Phase P3+ %'] if percentage: dict_of_lists_add(phasedict, pcode, percentage) for pcode in phasedict: percentages = phasedict[pcode] if len(percentages) == 1: phasedict[pcode] = get_fraction_str(percentages[0]) else: populations = popdict[pcode] numerator = 0 denominator = 0 for i, percentage in enumerate(percentages): population = populations[i] numerator += population * percentage denominator += population phasedict[pcode] = get_fraction_str(numerator, denominator) logger.info('Processed IPC') dataset = Dataset.read_from_hdx(ipc_configuration['dataset']) date = get_date_from_dataset_date(dataset) hxltag = '#affected+food+ipc+p3+pct' return [['FoodInsecurityIPCP3+'], [hxltag]], [phasedict], \ [(hxltag, date, dataset['dataset_source'], dataset.get_hdx_url())]
def get_travel_restrictions(airports): """ Scrapes trackcorona.live/api for covid-19 data on cities :param airports: a pandas DataFrame of airports data (ident, type, name, elevation, continent, iso country...) :return: a pandas DataFrame of iso_regions Covid-19 data (latitude, longitude, confirmed cases, dead, recovered) """ travel = pd.DataFrame( requests.get(CFG.TRAVEL_URL).json()[CFG.DATA_COLUMN_NAME]) iso_countries_codes = \ [Country.get_iso2_from_iso3(Country.get_iso3_country_code_fuzzy(x)[CFG.FIRST_ITEM]) if Country.get_iso3_country_code_fuzzy(x)[CFG.FIRST_ITEM] else '' for x in travel['location']] travel['location'] = iso_countries_codes travel = travel.rename(columns={'location': 'iso_country'}) return travel
def get_ipc(configuration, today, gho_countries, adminone, downloader, scrapers=None): name = inspect.currentframe().f_code.co_name if scrapers and not any(scraper in name for scraper in scrapers): return list(), list(), list(), list(), list() ipc_configuration = configuration['ipc'] url = ipc_configuration['url'] phases = ['3', '4', '5', 'P3+'] projections = ['Current', 'First Projection', 'Second Projection'] national_populations = {phase: dict() for phase in phases} national_analysed = dict() national_period = dict() national_start = dict() national_end = dict() subnational_populations = {phase: dict() for phase in phases} for countryiso3 in gho_countries: countryiso2 = Country.get_iso2_from_iso3(countryiso3) data, adm1_names = get_data(downloader, url, today, countryiso2) if not data: continue row = data[0] analysis_period, start, end = get_period(today, row, projections) for phase in phases: national_populations[phase][countryiso3] = row[ f'{analysis_period} Phase {phase} #'] national_analysed[countryiso3] = row['Current Population Analysed #'] national_period[countryiso3] = analysis_period national_start[countryiso3] = start national_end[countryiso3] = end for row in data[1:]: country = row['Country'] if adm1_names: if country not in adm1_names: continue adm1_name = country else: adm1_name = row['Area'] if not adm1_name or adm1_name == country: continue pcode, _ = adminone.get_pcode(countryiso3, adm1_name, 'IPC') if not pcode: continue for phase in phases: population = row[f'{analysis_period} Phase {phase} #'] if population: dict_of_lists_add(subnational_populations[phase], pcode, population) for phase in phases: subnational_population = subnational_populations[phase] for pcode in subnational_population: populations = subnational_population[pcode] if len(populations) == 1: subnational_population[pcode] = populations[0] else: population_in_pcode = 0 for i, population in enumerate(populations): population_in_pcode += population subnational_population[pcode] = population_in_pcode logger.info('Processed IPC') dataset = Dataset.read_from_hdx(ipc_configuration['dataset']) date = get_date_from_dataset_date(dataset, today=today) headers = [f'FoodInsecurityIPC{phase}' for phase in phases] headers.append('FoodInsecurityIPCAnalysedNum') headers.append('FoodInsecurityIPCAnalysisPeriod') headers.append('FoodInsecurityIPCAnalysisPeriodStart') headers.append('FoodInsecurityIPCAnalysisPeriodEnd') hxltags = [f'#affected+food+ipc+p{phase}+num' for phase in phases[:-1]] hxltags.append('#affected+food+ipc+p3plus+num') hxltags.append('#affected+food+ipc+analysed+num') hxltags.append('#date+ipc+period') hxltags.append('#date+ipc+start') hxltags.append('#date+ipc+end') national_outputs = [national_populations[phase] for phase in phases] national_outputs.append(national_analysed) national_outputs.append(national_period) national_outputs.append(national_start) national_outputs.append(national_end) subnational_outputs = [subnational_populations[phase] for phase in phases] return [headers, hxltags], national_outputs, [headers[:-4], hxltags[:-4]], subnational_outputs, \ [(hxltag, date, dataset['dataset_source'], dataset.get_hdx_url()) for hxltag in hxltags]
def get_ipc(configuration, admininfo, downloader, scrapers=None): name = inspect.currentframe().f_code.co_name if scrapers and not any(scraper in name for scraper in scrapers): return list(), list(), list(), list(), list() ipc_configuration = configuration['ipc'] url = ipc_configuration['url'] phases = ['3', '4', '5', 'P3+'] national_phases = {phase: dict() for phase in phases} national_analysed = dict() subnational_phases = {phase: dict() for phase in phases} subnational_populations = {phase: dict() for phase in phases} for countryiso3 in admininfo.countryiso3s: countryiso2 = Country.get_iso2_from_iso3(countryiso3) data, adm1_names = get_data(downloader, url, countryiso2) if not data: continue row = data[0] for phase in phases: national_phases[phase][countryiso3] = row[ f'Current Phase {phase} %'] national_analysed[ countryiso3] = f'{row["Current Population Analysed % of total county Pop"]:.03f}' for row in data[1:]: country = row['Country'] if adm1_names: if country not in adm1_names: continue adm1_name = country else: adm1_name = row['Area'] if not adm1_name or adm1_name == country: continue pcode, _ = admininfo.get_pcode(countryiso3, adm1_name, 'IPC') if not pcode: continue for phase in phases: population = row[f'Current Phase {phase} #'] if population: dict_of_lists_add(subnational_populations[phase], pcode, population) percentage = row[f'Current Phase {phase} %'] if percentage: dict_of_lists_add(subnational_phases[phase], pcode, percentage) for phase in phases: subnational_phase = subnational_phases[phase] for pcode in subnational_phase: percentages = subnational_phase[pcode] if len(percentages) == 1: subnational_phase[pcode] = get_fraction_str(percentages[0]) else: populations = subnational_populations[phase][pcode] numerator = 0 denominator = 0 for i, percentage in enumerate(percentages): population = populations[i] numerator += population * percentage denominator += population subnational_phase[pcode] = get_fraction_str( numerator, denominator) logger.info('Processed IPC') dataset = Dataset.read_from_hdx(ipc_configuration['dataset']) date = get_date_from_dataset_date(dataset) headers = [f'FoodInsecurityIPC{phase}' for phase in phases] headers.append('FoodInsecurityIPCAnalysed') hxltags = [f'#affected+food+ipc+p{phase}+pct' for phase in phases[:-1]] hxltags.append('#affected+food+ipc+p3plus+pct') hxltags.append('#affected+food+ipc+analysed+pct') national_outputs = [national_phases[phase] for phase in phases] national_outputs.append(national_analysed) subnational_outputs = [subnational_phases[phase] for phase in phases] return [headers, hxltags], national_outputs, [headers[:-1], hxltags[:-1]], subnational_outputs, \ [(hxltag, date, dataset['dataset_source'], dataset.get_hdx_url()) for hxltag in hxltags]
def generate_map(data, country, location, html, tree=None, data_sep='\t', id_index=0, colours=None): df = pd.read_csv(data, sep=data_sep, header=0, index_col=id_index) if country not in df.columns: raise ValueError( 'The country column {} not found among the annotation columns: {}.' .format(country, df.columns)) if location not in df.columns: raise ValueError( 'The location column {} not found among the annotation columns: {}.' .format(location, df.columns)) df.sort_values(by=[location], inplace=True, na_position='last') ddf = df.drop_duplicates(subset=[country], inplace=False, keep='first') country2location = { c: l for c, l in zip(ddf[country], ddf[location]) if not pd.isnull(c) and not pd.isnull(l) } if tree: df = df[np.in1d(df.index.astype(np.str), [_.name for _ in read_tree(tree)])] unique_countries = {_ for _ in df[country].unique() if not pd.isnull(_)} if ISO_EXISTS: country2iso = { _: Country.get_iso2_from_iso3(iso) for (_, iso) in ((_, Country.get_iso3_country_code_fuzzy(_)[0]) for _ in country2location.keys()) if iso and _ in unique_countries } else: country2iso = { _: escape(_) for _ in country2location.keys() if _ in unique_countries } iso2num = { iso: len(df[df[country] == c]) for c, iso in country2iso.items() } iso2loc = {iso: country2location[c] for c, iso in country2iso.items()} iso2loc_num = { iso: len(df[df[location] == loc]) for iso, loc in iso2loc.items() } iso2tooltip = { iso: escape('{}: {} samples (out of {} in {})'.format( c, iso2num[iso], iso2loc_num[iso], iso2loc[iso])) for (c, iso) in country2iso.items() } locations = sorted([_ for _ in df[location].unique() if not pd.isnull(_)]) num_unique_values = len(locations) if colours: colours = parse_colours(colours, locations) else: colours = get_enough_colours(num_unique_values) iso2colour = { iso: colours[locations.index(loc)] for iso, loc in iso2loc.items() } env = Environment(loader=PackageLoader('pastml')) template = env.get_template('geo_map.html') page = template.render(iso2colour=iso2colour, colours=colours, iso2tooltip=iso2tooltip) os.makedirs(os.path.abspath(os.path.dirname(html)), exist_ok=True) with open(html, 'w+') as fp: fp.write(page)