Beispiel #1
0
 def test_get_iso2_from_iso3(self):
     assert Country.get_iso2_from_iso3('jpn', use_live=False) == 'JP'
     assert Country.get_iso2_from_iso3('abc', use_live=False) is None
     with pytest.raises(LocationError):
         Country.get_iso2_from_iso3('abc',
                                    use_live=False,
                                    exception=LocationError)
Beispiel #2
0
 def convert_pcode_length(self, countryiso3, adm1_pcode, scrapername):
     if adm1_pcode in self.pcodes:
         return adm1_pcode
     pcode_length = len(adm1_pcode)
     country_pcodelength = self.pcode_lengths.get(countryiso3)
     if not country_pcodelength:
         return None
     if pcode_length == country_pcodelength or pcode_length < 4 or pcode_length > 6:
         return None
     if country_pcodelength == 4:
         pcode = '%s%s' % (Country.get_iso2_from_iso3(adm1_pcode[:3]), adm1_pcode[-2:])
     elif country_pcodelength == 5:
         if pcode_length == 4:
             pcode = '%s0%s' % (adm1_pcode[:2], adm1_pcode[-2:])
         else:
             pcode = '%s%s' % (Country.get_iso2_from_iso3(adm1_pcode[:3]), adm1_pcode[-3:])
     elif country_pcodelength == 6:
         if pcode_length == 4:
             pcode = '%s0%s' % (Country.get_iso3_from_iso2(adm1_pcode[:2]), adm1_pcode[-2:])
         else:
             pcode = '%s%s' % (Country.get_iso3_from_iso2(adm1_pcode[:2]), adm1_pcode[-3:])
     else:
         pcode = None
     if pcode in self.pcodes:
         self.matches.add((scrapername, countryiso3, adm1_pcode, self.pcode_to_name[pcode], 'pcode length conversion'))
         return pcode
     return None
def get_WHO_data(config,
                 country_iso3,
                 hxlize=False,
                 smooth_data=False,
                 n_days_smoothing=14):
    # Download the file and move it to a local directory
    logger.info('Downloading WHO data from HDX')
    WHO_dir = os.path.join(config.INPUT_DIR, config.WHO_DIR)
    Path(WHO_dir).mkdir(parents=True, exist_ok=True)
    download_filename = list(
        query_api(config.WHO_HDX_ADDRESS, WHO_dir,
                  resource_format='CSV').values())[0]
    final_filepath = os.path.join(WHO_dir, config.WHO_FILENAME)
    os.rename(os.path.join(WHO_dir, download_filename), final_filepath)
    # Get the data for the country based on ISO3
    logger.info(f'Returning WHO data for {country_iso3}')
    df_WHO = pd.read_csv(final_filepath)
    df_WHO = df_WHO.loc[df_WHO['Country_code'] == Country.get_iso2_from_iso3(
        country_iso3)]
    #in some cases the WHO data might include sudden increases in numbers, due to for example changes in reporting
    #sudden bumps might cause distorted outcomes of the model and thus in those cases we smooth the WHO data before inputting it to the graph
    if smooth_data:
        df_WHO = df_WHO.sort_values(by='Date_reported', ascending=True)
        for column_name in [
                'New_cases', 'Cumulative_cases', 'New_deaths',
                'Cumulative_deaths'
        ]:
            df_WHO[column_name] = df_WHO[column_name].rolling(
                window=n_days_smoothing).mean()
    if hxlize:
        df_WHO = df_WHO.rename(columns=HXL_DICT)
    return df_WHO
    def convert_pcode_length(self, countryiso3, pcode, scrapername=None):
        # type: (str, str, Optional[str]) ->  Optional[str]
        """Standardise pcode length by country and match to an internal pcode

        Args:
            countryiso3 (str): Iso3 country code
            pcode (str): P code for admin one
            scrapername (Optional[str]): Name of scraper for logging purposes. Defaults to None (don't log).

        Returns:
            Optional[str]: Matched P code or None if no match
        """
        if pcode in self.pcodes:
            return pcode
        pcode_length = len(pcode)
        country_pcodelength = self.pcode_lengths.get(countryiso3)
        if not country_pcodelength:
            return None
        if pcode_length == country_pcodelength or pcode_length < 4 or pcode_length > 6:
            return None
        if country_pcodelength == 4:
            pcode = '%s%s' % (Country.get_iso2_from_iso3(
                pcode[:3]), pcode[-2:])
        elif country_pcodelength == 5:
            if pcode_length == 4:
                pcode = '%s0%s' % (pcode[:2], pcode[-2:])
            else:
                pcode = '%s%s' % (Country.get_iso2_from_iso3(
                    pcode[:3]), pcode[-3:])
        elif country_pcodelength == 6:
            if pcode_length == 4:
                pcode = '%s0%s' % (Country.get_iso3_from_iso2(
                    pcode[:2]), pcode[-2:])
            else:
                pcode = '%s%s' % (Country.get_iso3_from_iso2(
                    pcode[:2]), pcode[-3:])
        else:
            pcode = None
        if pcode in self.pcodes:
            if scrapername:
                self.matches.add(
                    (scrapername, countryiso3, pcode,
                     self.pcode_to_name[pcode], 'pcode length conversion'))
            return pcode
        return None
Beispiel #5
0
def get_ipc(configuration, admininfo, downloader, scrapers=None):
    name = inspect.currentframe().f_code.co_name
    if scrapers and not any(scraper in name for scraper in scrapers):
        return list(), list(), list()
    ipc_configuration = configuration['ipc']
    url = ipc_configuration['url']
    phasedict = dict()
    popdict = dict()
    for countryiso3 in admininfo.countryiso3s:
        countryiso2 = Country.get_iso2_from_iso3(countryiso3)
        data, adm1_names = get_data(downloader, url, countryiso2)
        if not data:
            continue
        for row in data:
            country = row['Country']
            if adm1_names:
                if country not in adm1_names:
                    continue
                adm1_name = country
            else:
                adm1_name = row['Area']
                if not adm1_name or adm1_name == country:
                    continue
            pcode, _ = admininfo.get_pcode(countryiso3, adm1_name, 'IPC')
            if not pcode:
                continue
            population = row['Current Phase P3+ #']
            if population:
                dict_of_lists_add(popdict, pcode, population)
            percentage = row['Current Phase P3+ %']
            if percentage:
                dict_of_lists_add(phasedict, pcode, percentage)
    for pcode in phasedict:
        percentages = phasedict[pcode]
        if len(percentages) == 1:
            phasedict[pcode] = get_fraction_str(percentages[0])
        else:
            populations = popdict[pcode]
            numerator = 0
            denominator = 0
            for i, percentage in enumerate(percentages):
                population = populations[i]
                numerator += population * percentage
                denominator += population
            phasedict[pcode] = get_fraction_str(numerator, denominator)
    logger.info('Processed IPC')
    dataset = Dataset.read_from_hdx(ipc_configuration['dataset'])
    date = get_date_from_dataset_date(dataset)
    hxltag = '#affected+food+ipc+p3+pct'
    return [['FoodInsecurityIPCP3+'], [hxltag]], [phasedict], \
           [(hxltag, date, dataset['dataset_source'], dataset.get_hdx_url())]
def get_travel_restrictions(airports):
    """
    Scrapes trackcorona.live/api for covid-19 data on cities
    :param airports: a pandas DataFrame of airports data (ident, type, name, elevation, continent, iso country...)
    :return: a pandas DataFrame of iso_regions Covid-19 data (latitude, longitude, confirmed cases, dead, recovered)
    """
    travel = pd.DataFrame(
        requests.get(CFG.TRAVEL_URL).json()[CFG.DATA_COLUMN_NAME])

    iso_countries_codes = \
        [Country.get_iso2_from_iso3(Country.get_iso3_country_code_fuzzy(x)[CFG.FIRST_ITEM]) if
         Country.get_iso3_country_code_fuzzy(x)[CFG.FIRST_ITEM] else '' for x in travel['location']]

    travel['location'] = iso_countries_codes
    travel = travel.rename(columns={'location': 'iso_country'})

    return travel
Beispiel #7
0
def get_ipc(configuration,
            today,
            gho_countries,
            adminone,
            downloader,
            scrapers=None):
    name = inspect.currentframe().f_code.co_name
    if scrapers and not any(scraper in name for scraper in scrapers):
        return list(), list(), list(), list(), list()
    ipc_configuration = configuration['ipc']
    url = ipc_configuration['url']
    phases = ['3', '4', '5', 'P3+']
    projections = ['Current', 'First Projection', 'Second Projection']
    national_populations = {phase: dict() for phase in phases}
    national_analysed = dict()
    national_period = dict()
    national_start = dict()
    national_end = dict()
    subnational_populations = {phase: dict() for phase in phases}
    for countryiso3 in gho_countries:
        countryiso2 = Country.get_iso2_from_iso3(countryiso3)
        data, adm1_names = get_data(downloader, url, today, countryiso2)
        if not data:
            continue
        row = data[0]
        analysis_period, start, end = get_period(today, row, projections)
        for phase in phases:
            national_populations[phase][countryiso3] = row[
                f'{analysis_period} Phase {phase} #']
        national_analysed[countryiso3] = row['Current Population Analysed #']
        national_period[countryiso3] = analysis_period
        national_start[countryiso3] = start
        national_end[countryiso3] = end
        for row in data[1:]:
            country = row['Country']
            if adm1_names:
                if country not in adm1_names:
                    continue
                adm1_name = country
            else:
                adm1_name = row['Area']
                if not adm1_name or adm1_name == country:
                    continue
            pcode, _ = adminone.get_pcode(countryiso3, adm1_name, 'IPC')
            if not pcode:
                continue
            for phase in phases:
                population = row[f'{analysis_period} Phase {phase} #']
                if population:
                    dict_of_lists_add(subnational_populations[phase], pcode,
                                      population)
    for phase in phases:
        subnational_population = subnational_populations[phase]
        for pcode in subnational_population:
            populations = subnational_population[pcode]
            if len(populations) == 1:
                subnational_population[pcode] = populations[0]
            else:
                population_in_pcode = 0
                for i, population in enumerate(populations):
                    population_in_pcode += population
                subnational_population[pcode] = population_in_pcode
    logger.info('Processed IPC')
    dataset = Dataset.read_from_hdx(ipc_configuration['dataset'])
    date = get_date_from_dataset_date(dataset, today=today)
    headers = [f'FoodInsecurityIPC{phase}' for phase in phases]
    headers.append('FoodInsecurityIPCAnalysedNum')
    headers.append('FoodInsecurityIPCAnalysisPeriod')
    headers.append('FoodInsecurityIPCAnalysisPeriodStart')
    headers.append('FoodInsecurityIPCAnalysisPeriodEnd')
    hxltags = [f'#affected+food+ipc+p{phase}+num' for phase in phases[:-1]]
    hxltags.append('#affected+food+ipc+p3plus+num')
    hxltags.append('#affected+food+ipc+analysed+num')
    hxltags.append('#date+ipc+period')
    hxltags.append('#date+ipc+start')
    hxltags.append('#date+ipc+end')
    national_outputs = [national_populations[phase] for phase in phases]
    national_outputs.append(national_analysed)
    national_outputs.append(national_period)
    national_outputs.append(national_start)
    national_outputs.append(national_end)
    subnational_outputs = [subnational_populations[phase] for phase in phases]
    return [headers, hxltags], national_outputs, [headers[:-4], hxltags[:-4]], subnational_outputs, \
           [(hxltag, date, dataset['dataset_source'], dataset.get_hdx_url()) for hxltag in hxltags]
Beispiel #8
0
def get_ipc(configuration, admininfo, downloader, scrapers=None):
    name = inspect.currentframe().f_code.co_name
    if scrapers and not any(scraper in name for scraper in scrapers):
        return list(), list(), list(), list(), list()
    ipc_configuration = configuration['ipc']
    url = ipc_configuration['url']
    phases = ['3', '4', '5', 'P3+']
    national_phases = {phase: dict() for phase in phases}
    national_analysed = dict()
    subnational_phases = {phase: dict() for phase in phases}
    subnational_populations = {phase: dict() for phase in phases}
    for countryiso3 in admininfo.countryiso3s:
        countryiso2 = Country.get_iso2_from_iso3(countryiso3)
        data, adm1_names = get_data(downloader, url, countryiso2)
        if not data:
            continue
        row = data[0]
        for phase in phases:
            national_phases[phase][countryiso3] = row[
                f'Current Phase {phase} %']
        national_analysed[
            countryiso3] = f'{row["Current Population Analysed % of total county Pop"]:.03f}'
        for row in data[1:]:
            country = row['Country']
            if adm1_names:
                if country not in adm1_names:
                    continue
                adm1_name = country
            else:
                adm1_name = row['Area']
                if not adm1_name or adm1_name == country:
                    continue
            pcode, _ = admininfo.get_pcode(countryiso3, adm1_name, 'IPC')
            if not pcode:
                continue
            for phase in phases:
                population = row[f'Current Phase {phase} #']
                if population:
                    dict_of_lists_add(subnational_populations[phase], pcode,
                                      population)
                percentage = row[f'Current Phase {phase} %']
                if percentage:
                    dict_of_lists_add(subnational_phases[phase], pcode,
                                      percentage)
    for phase in phases:
        subnational_phase = subnational_phases[phase]
        for pcode in subnational_phase:
            percentages = subnational_phase[pcode]
            if len(percentages) == 1:
                subnational_phase[pcode] = get_fraction_str(percentages[0])
            else:
                populations = subnational_populations[phase][pcode]
                numerator = 0
                denominator = 0
                for i, percentage in enumerate(percentages):
                    population = populations[i]
                    numerator += population * percentage
                    denominator += population
                subnational_phase[pcode] = get_fraction_str(
                    numerator, denominator)
    logger.info('Processed IPC')
    dataset = Dataset.read_from_hdx(ipc_configuration['dataset'])
    date = get_date_from_dataset_date(dataset)
    headers = [f'FoodInsecurityIPC{phase}' for phase in phases]
    headers.append('FoodInsecurityIPCAnalysed')
    hxltags = [f'#affected+food+ipc+p{phase}+pct' for phase in phases[:-1]]
    hxltags.append('#affected+food+ipc+p3plus+pct')
    hxltags.append('#affected+food+ipc+analysed+pct')
    national_outputs = [national_phases[phase] for phase in phases]
    national_outputs.append(national_analysed)
    subnational_outputs = [subnational_phases[phase] for phase in phases]
    return [headers, hxltags], national_outputs, [headers[:-1], hxltags[:-1]], subnational_outputs, \
           [(hxltag, date, dataset['dataset_source'], dataset.get_hdx_url()) for hxltag in hxltags]
Beispiel #9
0
def generate_map(data,
                 country,
                 location,
                 html,
                 tree=None,
                 data_sep='\t',
                 id_index=0,
                 colours=None):
    df = pd.read_csv(data, sep=data_sep, header=0, index_col=id_index)
    if country not in df.columns:
        raise ValueError(
            'The country column {} not found among the annotation columns: {}.'
            .format(country, df.columns))
    if location not in df.columns:
        raise ValueError(
            'The location column {} not found among the annotation columns: {}.'
            .format(location, df.columns))
    df.sort_values(by=[location], inplace=True, na_position='last')
    ddf = df.drop_duplicates(subset=[country], inplace=False, keep='first')
    country2location = {
        c: l
        for c, l in zip(ddf[country], ddf[location])
        if not pd.isnull(c) and not pd.isnull(l)
    }
    if tree:
        df = df[np.in1d(df.index.astype(np.str),
                        [_.name for _ in read_tree(tree)])]
    unique_countries = {_ for _ in df[country].unique() if not pd.isnull(_)}
    if ISO_EXISTS:
        country2iso = {
            _: Country.get_iso2_from_iso3(iso)
            for (_, iso) in ((_, Country.get_iso3_country_code_fuzzy(_)[0])
                             for _ in country2location.keys())
            if iso and _ in unique_countries
        }
    else:
        country2iso = {
            _: escape(_)
            for _ in country2location.keys() if _ in unique_countries
        }
    iso2num = {
        iso: len(df[df[country] == c])
        for c, iso in country2iso.items()
    }
    iso2loc = {iso: country2location[c] for c, iso in country2iso.items()}
    iso2loc_num = {
        iso: len(df[df[location] == loc])
        for iso, loc in iso2loc.items()
    }
    iso2tooltip = {
        iso: escape('{}: {} samples (out of {} in {})'.format(
            c, iso2num[iso], iso2loc_num[iso], iso2loc[iso]))
        for (c, iso) in country2iso.items()
    }
    locations = sorted([_ for _ in df[location].unique() if not pd.isnull(_)])
    num_unique_values = len(locations)
    if colours:
        colours = parse_colours(colours, locations)
    else:
        colours = get_enough_colours(num_unique_values)
    iso2colour = {
        iso: colours[locations.index(loc)]
        for iso, loc in iso2loc.items()
    }

    env = Environment(loader=PackageLoader('pastml'))
    template = env.get_template('geo_map.html')
    page = template.render(iso2colour=iso2colour,
                           colours=colours,
                           iso2tooltip=iso2tooltip)
    os.makedirs(os.path.abspath(os.path.dirname(html)), exist_ok=True)
    with open(html, 'w+') as fp:
        fp.write(page)