Exemple #1
0
def get_countries(countries_path, downloader):
    """Download a list of countries and provide mapping if necessary.
    A list of dictionaries is returned, each containing the following keys:
    iso3 - ISO 3 country code
    name - country name
    code - WFP country code
    """
    countries = list()

    headers, iterator = downloader.get_tabular_rows(countries_path,
                                                    headers=1,
                                                    dict_form=True,
                                                    format='csv')
    for row in iterator:
        wfp_name = row['ADM0_NAME']
        code = row['ADM0_CODE']
        iso3, fuzzy = Country.get_iso3_country_code_fuzzy(wfp_name)
        if iso3 is None:
            continue
        countries.append({'iso3': iso3, 'code': code})

    return countries
def check_domain(ds: pd.DataFrame) -> [list, list]:
    """ if column domain is Country, check whether any column header does not belong to a country name
    Args:
        ds: the DataFrame to check the column headers on
    Returns:
        A list with countries that are not recognized as countries
    """
    not_a_country = []
    iso_list = []
    if 'country' in ds.columns.name.lower():
        for country in ds.columns:
            iso, fuzzy = Country.get_iso3_country_code_fuzzy(country,
                                                             use_live=False)
            if iso is None:
                country_type = utils.check_country_fsp(country)
                if country_type is None:
                    not_a_country.append(country)
                else:
                    not_a_country.append(
                        (country + " (Domain: " + country_type + ")"))
            else:
                iso_list.append(iso)
    return not_a_country, iso_list
Exemple #3
0
 def get_adm(admcol, i):
     template_string, match_string = match_template(admcol)
     if template_string:
         admcol = self.headers[int(match_string)]
     adm = row[admcol]
     if not adm:
         return False
     adm = adm.strip()
     adms[i] = adm
     if adm in self.adms[i]:
         return True
     exact = False
     if self.admexact:
         adms[i] = None
     else:
         if i == 0:
             adms[i], exact = Country.get_iso3_country_code_fuzzy(adm)
         elif i == 1:
             adms[i], exact = self.adminone.get_pcode(
                 adms[0], adm, scrapername)
         if adms[i] not in self.adms[i]:
             adms[i] = None
     return exact
 def get_adm(self, adms, admexact, i, scrapername):
     adm = adms[i]
     if adm in self.adms[i]:
         exact = True
     else:
         exact = False
         if admexact:
             adms[i] = None
         else:
             if i == 0:
                 mappingadm = self.adm_mappings[0].get(adm)
                 if mappingadm:
                     adms[i] = mappingadm
                     return True
                 adms[i], _ = Country.get_iso3_country_code_fuzzy(adm)
                 exact = False
             elif i == 1:
                 adms[i], exact = self.get_pcode(adms[0], adm, scrapername)
             else:
                 adms[i] = None
             if adms[i] not in self.adms[i]:
                 adms[i] = None
     return exact
def add_food_prices(configuration, countryiso3s, downloader, scrapers=None):
    name = 'food_prices'
    if scrapers and not any(scraper in name for scraper in scrapers):
        return list(), list(), list()
    datasetinfo = configuration[name]
    headers, iterator = read_hdx(downloader, datasetinfo)
    allowed_months = set()
    for i in range(1, 7, 1):
        month = today.month - i
        if month > 0:
            allowed_months.add('%d/%d' % (today.year, month))
        else:
            month = 12 - month
            allowed_months.add('%d/%d' % (today.year - 1, month))
    commods_per_country = dict()
    affected_commods_per_country = dict()
    for row in iterator:
        year_month = '%s/%s' % (row['Year'], row['Month'])
        if year_month not in allowed_months:
            continue
        countryiso, _ = Country.get_iso3_country_code_fuzzy(row['Country'])
        if not countryiso or countryiso not in countryiso3s:
            continue
        commods_per_country[countryiso] = commods_per_country.get(
            countryiso, 0) + 1
        if row['ALPS'] != 'Normal':
            affected_commods_per_country[
                countryiso] = affected_commods_per_country.get(countryiso,
                                                               0) + 1
    ratios = calculate_ratios(commods_per_country,
                              affected_commods_per_country)
    hxltag = '#value+food+num+ratio'
    logger.info('Processed WFP')
    return [['Food Prices Ratio'], [hxltag]
            ], [ratios], [(hxltag, datasetinfo['date'], datasetinfo['source'],
                           datasetinfo['source_url'])]
Exemple #6
0
def generate_dataset_and_showcase(downloader,
                                  countrydata,
                                  endpoints_metadata,
                                  folder,
                                  merge_resources=True,
                                  single_dataset=False,
                                  split_to_resources_by_column="STAT_UNIT",
                                  remove_useless_columns=True):
    """
    https://api.uis.unesco.org/sdmx/data/UNESCO,DEM_ECO/....AU.?format=csv-:-tab-true-y&locale=en&subscription-key=...

    :param downloader: Downloader object
    :param countrydata: Country datastructure from UNESCO API
    :param endpoints_metadata: Endpoint datastructure from UNESCO API
    :param folder: temporary folder
    :param merge_resources: if true, merge resources for all time periods
    :param single_dataset: if true, put all endpoints into a single dataset
    :param split_to_resources_by_column: split data into multiple resorces (csv) based on a value in the specified column
    :param remove_useless_columns:
    :return: generator yielding (dataset, showcase) tuples. It may yield None, None.
    """
    countryiso2 = countrydata['id']
    countryname = countrydata['names'][0]['value']
    logger.info("Processing %s" % countryname)

    if countryname[:4] in ['WB: ', 'SDG:', 'MDG:', 'UIS:', 'EFA:'] or countryname[:5] in ['GEMR:', 'AIMS:'] or \
            countryname[:7] in ['UNICEF:', 'UNESCO:']:
        logger.info('Ignoring %s!' % countryname)
        yield None, None
        return

    countryiso3 = Country.get_iso3_from_iso2(countryiso2)

    if countryiso3 is None:
        countryiso3, _ = Country.get_iso3_country_code_fuzzy(countryname)
        if countryiso3 is None:
            logger.exception('Cannot get iso3 code for %s!' % countryname)
            yield None, None
            return
        logger.info('Matched %s to %s!' % (countryname, countryiso3))

    earliest_year = 10000
    latest_year = 0

    if single_dataset:
        name = 'UNESCO indicators - %s' % countryname
        dataset, showcase = create_dataset_showcase(
            name,
            countryname,
            countryiso2,
            countryiso3,
            single_dataset=single_dataset)
        if dataset is None:
            return

    for endpoint in sorted(endpoints_metadata):
        time.sleep(0.2)
        indicator, structure_url, more_info_url, dimensions = endpoints_metadata[
            endpoint]
        structure_url = structure_url % countryiso2
        response = load_safely(downloader,
                               '%s%s' % (structure_url, dataurl_suffix))
        json = response.json()
        if not single_dataset:
            name = 'UNESCO %s - %s' % (json["structure"]["name"], countryname)
            dataset, showcase = create_dataset_showcase(
                name,
                countryname,
                countryiso2,
                countryiso3,
                single_dataset=single_dataset)
            if dataset is None:
                continue
        observations = json['structure']['dimensions']['observation']
        time_periods = dict()
        for observation in observations:
            if observation['id'] == 'TIME_PERIOD':
                for value in observation['values']:
                    time_periods[int(value['id'])] = value['actualObs']
        if len(time_periods) == 0:
            logger.warning('No time periods for endpoint %s for country %s!' %
                           (indicator, countryname))
            continue

        earliest_year = min(earliest_year, *time_periods.keys())
        latest_year = max(latest_year, *time_periods.keys())

        csv_url = '%sformat=csv' % structure_url

        description = more_info_url
        if description != ' ':
            description = '[Info on %s](%s)' % (indicator, description)
        description = 'To save, right click download button & click Save Link/Target As  \n%s' % description

        df = None
        for start_year, end_year in chunk_years(time_periods):
            if merge_resources:
                df1 = download_df(downloader, csv_url, start_year, end_year)
                if df1 is not None:
                    df = df1 if df is None else df.append(df1)
            else:
                url_years = '&startPeriod=%d&endPeriod=%d' % (start_year,
                                                              end_year)
                resource = {
                    'name': '%s (%d-%d)' % (indicator, start_year, end_year),
                    'description': description,
                    'format': 'csv',
                    'url':
                    downloader.get_full_url('%s%s' % (csv_url, url_years))
                }
                dataset.add_update_resource(resource)

        if df is not None:
            stat = {
                x["id"]: x["name"]
                for d in dimensions if d["id"] == "STAT_UNIT"
                for x in d["values"]
            }
            for value, df_part in split_df_by_column(
                    process_df(df), split_to_resources_by_column):
                file_csv = join(
                    folder,
                    ("UNESCO_%s_%s.csv" %
                     (countryiso3, endpoint +
                      ("" if value is None else "_" + value))).replace(
                          " ",
                          "-").replace(":", "-").replace("/", "-").replace(
                              ",", "-").replace("(", "-").replace(")", "-"))
                if remove_useless_columns:
                    df_part = remove_useless_columns_from_df(df_part)
                df_part["country-iso3"] = countryiso3
                df_part.iloc[
                    0,
                    df_part.columns.get_loc("country-iso3")] = "#country+iso3"
                df_part["Indicator name"] = value
                df_part.iloc[0, df_part.columns.get_loc("Indicator name"
                                                        )] = "#indicator+name"
                df_part = postprocess_df(df_part)
                df_part.to_csv(file_csv, index=False)
                description_part = stat.get(
                    value, 'Info on %s%s' %
                    ("" if value is None else value + " in ", indicator))
                resource = Resource({
                    'name': value,
                    'description': description_part
                })
                resource.set_file_type('csv')
                resource.set_file_to_upload(file_csv)
                dataset.add_update_resource(resource)

        if not single_dataset:
            if dataset is None or len(dataset.get_resources()) == 0:
                logger.error('No resources created for country %s, %s!' %
                             (countryname, endpoint))
            else:
                dataset.set_dataset_year_range(min(time_periods.keys()),
                                               max(time_periods.keys()))
                yield dataset, showcase

    if single_dataset:
        if dataset is None or len(dataset.get_resources()) == 0:
            logger.error('No resources created for country %s!' %
                         (countryname))
        else:
            dataset.set_dataset_year_range(earliest_year, latest_year)
            yield dataset, showcase
Exemple #7
0
 def test_get_iso3_country_code(self):
     assert Country.get_iso3_country_code('jpn', use_live=False) == 'JPN'
     assert Country.get_iso3_country_code('Dem. Rep. of the Congo',
                                          use_live=False) == 'COD'
     assert Country.get_iso3_country_code('Russian Fed.',
                                          use_live=False) == 'RUS'
     assert Country.get_iso3_country_code(
         'Micronesia (Federated States of)', use_live=False) == 'FSM'
     assert Country.get_iso3_country_code('Iran (Islamic Rep. of)',
                                          use_live=False) == 'IRN'
     assert Country.get_iso3_country_code('United Rep. of Tanzania',
                                          use_live=False) == 'TZA'
     assert Country.get_iso3_country_code('Syrian Arab Rep.',
                                          use_live=False) == 'SYR'
     assert Country.get_iso3_country_code('Central African Rep.',
                                          use_live=False) == 'CAF'
     assert Country.get_iso3_country_code('Rep. of Korea',
                                          use_live=False) == 'KOR'
     assert Country.get_iso3_country_code('St. Pierre and Miquelon',
                                          use_live=False) == 'SPM'
     assert Country.get_iso3_country_code('Christmas Isl.',
                                          use_live=False) == 'CXR'
     assert Country.get_iso3_country_code('Cayman Isl.',
                                          use_live=False) == 'CYM'
     assert Country.get_iso3_country_code('jp', use_live=False) == 'JPN'
     assert Country.get_iso3_country_code('Taiwan (Province of China)',
                                          use_live=False) == 'TWN'
     assert Country.get_iso3_country_code_fuzzy('jpn',
                                                use_live=False) == ('JPN',
                                                                    True)
     assert Country.get_iso3_country_code_fuzzy('ZWE',
                                                use_live=False) == ('ZWE',
                                                                    True)
     assert Country.get_iso3_country_code_fuzzy('Vut',
                                                use_live=False) == ('VUT',
                                                                    True)
     assert Country.get_iso3_country_code('abc', use_live=False) is None
     with pytest.raises(LocationError):
         Country.get_iso3_country_code('abc',
                                       use_live=False,
                                       exception=LocationError)
     assert Country.get_iso3_country_code_fuzzy('abc',
                                                use_live=False) == (None,
                                                                    False)
     with pytest.raises(LocationError):
         Country.get_iso3_country_code_fuzzy('abc',
                                             use_live=False,
                                             exception=LocationError)
     assert Country.get_iso3_country_code_fuzzy('United Kingdom',
                                                use_live=False) == ('GBR',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy(
         'United Kingdom of Great Britain and Northern Ireland',
         use_live=False) == ('GBR', True)
     assert Country.get_iso3_country_code_fuzzy('united states',
                                                use_live=False) == ('USA',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('united states of america',
                                                use_live=False) == ('USA',
                                                                    True)
     assert Country.get_iso3_country_code('UZBEKISTAN',
                                          use_live=False) == 'UZB'
     assert Country.get_iso3_country_code_fuzzy('UZBEKISTAN',
                                                use_live=False) == ('UZB',
                                                                    True)
     assert Country.get_iso3_country_code('Sierra', use_live=False) is None
     assert Country.get_iso3_country_code_fuzzy('Sierra',
                                                use_live=False) == ('SLE',
                                                                    False)
     assert Country.get_iso3_country_code('Venezuela',
                                          use_live=False) is None
     assert Country.get_iso3_country_code_fuzzy('Venezuela',
                                                use_live=False) == ('VEN',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Heard Isl.',
                                                use_live=False) == ('HMD',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Falkland Isl.',
                                                use_live=False) == ('FLK',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Czech Republic',
                                                use_live=False) == ('CZE',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Czech Rep.',
                                                use_live=False) == ('CZE',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Islamic Rep. of Iran',
                                                use_live=False) == ('IRN',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Dem. Congo',
                                                use_live=False) == ('COD',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Congo, Republic of',
                                                use_live=False) == ('COG',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Republic of the Congo',
                                                use_live=False) == ('COG',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Vietnam',
                                                use_live=False) == ('VNM',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('South Korea',
                                                use_live=False) == ('KOR',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Korea Republic',
                                                use_live=False) == ('KOR',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Dem. Republic Korea',
                                                use_live=False) == ('PRK',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('North Korea',
                                                use_live=False) == ('PRK',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy(
         'Serbia and Kosovo: S/RES/1244 (1999)',
         use_live=False) == ('SRB', False)
     assert Country.get_iso3_country_code_fuzzy('U.S. Virgin Islands',
                                                use_live=False) == ('VIR',
                                                                    True)
     assert Country.get_iso3_country_code_fuzzy('U.K. Virgin Islands',
                                                use_live=False) == ('VGB',
                                                                    False)
     assert Country.get_iso3_country_code_fuzzy('Taiwan',
                                                use_live=False) == ('TWN',
                                                                    False)
     with pytest.raises(ValueError):
         Country.get_iso3_country_code('abc',
                                       use_live=False,
                                       exception=ValueError)
     with pytest.raises(ValueError):
         Country.get_iso3_country_code_fuzzy('abc',
                                             use_live=False,
                                             exception=ValueError)
Exemple #8
0
def main():
    configuration = Configuration.read()
    enddays = configuration['enddays']
    ignore_users = configuration['ignore_users']
    users_scrapers = configuration['users_scrapers']
    spreadsheet_url = configuration['spreadsheet_url']
    sheetname = configuration['sheetname']
    logger.info('> GSheet Credentials: %s' % gsheet_auth)
    users = dict()
    info = json.loads(gsheet_auth)
    scopes = ['https://www.googleapis.com/auth/spreadsheets']
    credentials = service_account.Credentials.from_service_account_info(info, scopes=scopes)
    gc = pygsheets.authorize(custom_credentials=credentials)
    spreadsheet = gc.open_by_url(spreadsheet_url)
    sheet = spreadsheet.worksheet_by_title(sheetname)
    keys = sheet.get_row(1)
    rows = [keys]
    crisisdata = configuration['crisisdata']
    for crisis in crisisdata:
        data = crisisdata[crisis]
        startdate = parse_date(data['startdate'])
        enddate = startdate + timedelta(days=enddays)
        searchlist = list()
        for country in data.get('countries', list()):
            iso3, _ = Country.get_iso3_country_code_fuzzy(country)
            searchlist.append('groups:%s' % iso3.lower())
        for tag in data.get('tags', list()):
            searchlist.append('vocab_Topics:"%s"' % tag.lower())
        search_string = 'metadata_created:[2000-01-01T00:00:00.000Z TO %sZ] AND (%s)' % (enddate.isoformat(), ' OR '.join(searchlist))
        datasets = Dataset.search_in_hdx(fq=search_string)
        row = {'ID': data['id'], 'Crisis name': crisis}
        count = 0
        largest_activities = 0
        for dataset in datasets:
            metadata_created_str = dataset['metadata_created']
            orgname = dataset['organization']['name']
            metadata_created = parse_date(metadata_created_str)
            new_or_updated = 'new'
            updated_when = ''
            updated_by = ''
            # if metadata_created < startdate:
            #     activities = Activity.get_all_activities(id=dataset['id'], limit=10000)
            #     activities_len = len(activities)
            #     if activities_len > largest_activities:
            #         largest_activities = activities_len
            #     found = False
            #     for activity in activities:
            #         timestamp = activity['timestamp']
            #         activity_date = parse_date(timestamp)
            #         if startdate < activity_date < enddate:
            #             new_or_updated = 'updated'
            #             updated_when = timestamp
            #             user_id = activity['user_id']
            #             check_ignore = True
            #             for user_scrapers in users_scrapers:
            #                 if user_id == user_scrapers['id']:
            #                     if orgname in user_scrapers['scrapers']:
            #                         check_ignore = False
            #                         break
            #             if check_ignore:
            #                 if user_id in ignore_users:
            #                     continue
            #             username = users.get(user_id)
            #             if username is None:
            #                 user = User.read_from_hdx(user_id)
            #                 username = get_user_name(user)
            #                 users[user_id] = username
            #             updated_by = username
            #             found = True
            #             break
            #     if not found:
            #         continue
            row['dataset title'] = dataset['title']
            row['dataset id'] = dataset['id']
            row['dataset url'] = dataset.get_hdx_url()
            row['org name'] = orgname
            row['org id'] = dataset['organization']['id']
            row['created'] = metadata_created_str
            row['new or updated'] = new_or_updated
            row['updated when'] = updated_when
            row['updated by'] = updated_by
            rows.append([row.get(key, '') for key in keys])
            count += 1
        logger.info('%s: %d\t%s' % (crisis, count, search_string))
    sheet.clear()
    sheet.update_values('A1', rows)
    logger.info('Longest activities: %d' % largest_activities)
Exemple #9
0
    #
    'Botswana': 'Southern Africa ex SA',
    'Malawi': 'Southern Africa ex SA',
    'Mozambique': 'Southern Africa ex SA',
    'Zimbabwe': 'Southern Africa ex SA',
    #
    'Cameroon': 'West Africa',
    'Equatorial Guinea': 'West Africa',
    'Gabon': 'West Africa',
    'Mali': 'West Africa',
    'Nigeria': 'West Africa',
    'Senegal': 'West Africa'
}

iso32loc = {
    Country.get_iso3_country_code_fuzzy(c)[0]: loc
    for (c, loc) in country2loc.items()
    if Country.get_iso3_country_code_fuzzy(c)[0]
}

if '__main__' == __name__:
    import argparse

    parser = argparse.ArgumentParser()

    parser.add_argument('--drm_tab', required=True, type=str)
    parser.add_argument('--input_data', required=True, type=str)
    parser.add_argument('--output_data', required=True, type=str)
    params = parser.parse_args()

    df = pd.read_csv(params.drm_tab, index_col=0, header=0, sep='\t')
Exemple #10
0
def generate_map(data,
                 country,
                 location,
                 html,
                 tree=None,
                 data_sep='\t',
                 id_index=0,
                 colours=None):
    df = pd.read_csv(data, sep=data_sep, header=0, index_col=id_index)
    if country not in df.columns:
        raise ValueError(
            'The country column {} not found among the annotation columns: {}.'
            .format(country, df.columns))
    if location not in df.columns:
        raise ValueError(
            'The location column {} not found among the annotation columns: {}.'
            .format(location, df.columns))
    df.sort_values(by=[location], inplace=True, na_position='last')
    ddf = df.drop_duplicates(subset=[country], inplace=False, keep='first')
    country2location = {
        c: l
        for c, l in zip(ddf[country], ddf[location])
        if not pd.isnull(c) and not pd.isnull(l)
    }
    if tree:
        df = df[np.in1d(df.index.astype(np.str),
                        [_.name for _ in read_tree(tree)])]
    unique_countries = {_ for _ in df[country].unique() if not pd.isnull(_)}
    if ISO_EXISTS:
        country2iso = {
            _: Country.get_iso2_from_iso3(iso)
            for (_, iso) in ((_, Country.get_iso3_country_code_fuzzy(_)[0])
                             for _ in country2location.keys())
            if iso and _ in unique_countries
        }
    else:
        country2iso = {
            _: escape(_)
            for _ in country2location.keys() if _ in unique_countries
        }
    iso2num = {
        iso: len(df[df[country] == c])
        for c, iso in country2iso.items()
    }
    iso2loc = {iso: country2location[c] for c, iso in country2iso.items()}
    iso2loc_num = {
        iso: len(df[df[location] == loc])
        for iso, loc in iso2loc.items()
    }
    iso2tooltip = {
        iso: escape('{}: {} samples (out of {} in {})'.format(
            c, iso2num[iso], iso2loc_num[iso], iso2loc[iso]))
        for (c, iso) in country2iso.items()
    }
    locations = sorted([_ for _ in df[location].unique() if not pd.isnull(_)])
    num_unique_values = len(locations)
    if colours:
        colours = parse_colours(colours, locations)
    else:
        colours = get_enough_colours(num_unique_values)
    iso2colour = {
        iso: colours[locations.index(loc)]
        for iso, loc in iso2loc.items()
    }

    env = Environment(loader=PackageLoader('pastml'))
    template = env.get_template('geo_map.html')
    page = template.render(iso2colour=iso2colour,
                           colours=colours,
                           iso2tooltip=iso2tooltip)
    os.makedirs(os.path.abspath(os.path.dirname(html)), exist_ok=True)
    with open(html, 'w+') as fp:
        fp.write(page)
def get_camp_non_camp_populations(noncamp_types, camp_types, camp_overrides,
                                  datasets, downloader):
    noncamp_types = noncamp_types.split(',')
    camp_types = camp_types.split(',')
    dataset_unhcr = None
    latest_date = None
    for dataset in datasets:
        if 'displacement' in dataset['title'].lower():
            date = dataset.get_dataset_date_as_datetime()
            if latest_date is None or date > latest_date:
                dataset_unhcr = dataset
                latest_date = date
    if dataset_unhcr is None:
        raise ValueError('No UNHCR dataset found!')
    url = dataset_unhcr.get_resources()[0]['url']
    country_ind = 0  # assume first column contains country
    iso3 = None
    row = None
    prev_row = None
    all_camps_per_country = dict()
    unhcr_non_camp = dict()
    unhcr_camp = dict()
    unhcr_camp_excluded = dict()
    rowiter = downloader.get_tabular_rows(url, sheet='Tab15')
    for row in rowiter:
        country = row[country_ind]
        iso3 = Country.get_iso3_country_code(country)
        if iso3 is not None:
            break
        prev_row = row
    accommodation_ind = None
    location_ind = None
    population_ind = None
    population = None
    for i, text in enumerate(prev_row):
        header = text.lower()
        value = row[i]
        if 'accommodation' in header:
            accommodation_ind = i
        elif 'location' in header and len(value) > 1:
            location_ind = i
        else:
            try:
                population = int(value)
                population_ind = i
                break
            except ValueError:
                pass
    campname = row[location_ind]

    def get_accommodation_type(name):
        accom_type = camp_overrides['Accommodation Type'].get(name)
        if accom_type is None:
            accom_type = row[accommodation_ind]
        else:
            logger.info('Overriding accommodation type to %s for %s' %
                        (accom_type, name))
        return accom_type.lower()

    accommodation_type = get_accommodation_type(campname)

    def match_camp_types(name, accom_type, pop, iso):
        if check_name_dispersed(name):
            accom_type = noncamp_types[0]
        found_camp_type = None
        for camp_type in camp_types:
            if camp_type in accom_type:
                found_camp_type = camp_type
                unhcr_camp[name] = pop, iso, found_camp_type
                break
        for noncamp_type in noncamp_types:
            if noncamp_type in accom_type:
                found_camp_type = noncamp_type
                append_value(unhcr_non_camp, iso, found_camp_type, name, pop)
                break
        if found_camp_type is None:
            append_value(unhcr_camp_excluded, iso, accom_type, name, pop)
            append_value(all_camps_per_country, iso, accom_type, name, pop)
        else:
            append_value(all_camps_per_country, iso, found_camp_type, name,
                         pop)

    match_camp_types(campname, accommodation_type, population, iso3)
    for row in rowiter:
        country = row[country_ind]
        if not country:
            continue
        if 'NOTES' in country.upper():
            break
        iso3, match = Country.get_iso3_country_code_fuzzy(country)
        if iso3 is None:
            logger.warning('Country %s could not be matched to ISO3 code!' %
                           country)
            continue
        else:
            if match is False:
                logger.info('Matched %s to ISO3: %s!' % (country, iso3))
        campname = row[location_ind]
        accommodation_type = get_accommodation_type(campname)
        population = int(row[population_ind])
        match_camp_types(campname, accommodation_type, population, iso3)

    for campname in sorted(camp_overrides['Population']):
        if campname in unhcr_camp:
            continue
        iso3 = camp_overrides['Country'][campname]
        accommodation_type = camp_overrides['Accommodation Type'][
            campname].lower()
        population = camp_overrides['Population'][campname]
        logger.info('Adding camp from override: %s (%s, %s): %d' %
                    (campname, iso3, accommodation_type, population))
        match_camp_types(campname, accommodation_type, population, iso3)

    return all_camps_per_country, unhcr_non_camp, unhcr_camp, unhcr_camp_excluded
def get_iso3(name):
    iso3, match = Country.get_iso3_country_code_fuzzy(name,
                                                      exception=ValueError)
    if not match:
        logger.info('Country %s matched to ISO3: %s!' % (name, iso3))
    return iso3
Exemple #13
0
def get_iso3(_):
    iso3 = Country.get_iso3_country_code_fuzzy(_)[0]
    return iso3 if iso3 else _
def generate_dataset(dataset_id,
                     configuration,
                     downloader,
                     output_failures=False):
    metadata_url = configuration["metadata_url"] % dataset_id
    response = downloader.download(
        f"{configuration['base_url']}{metadata_url}")
    json = response.json()
    study_desc = json["study_desc"]
    title_statement = study_desc["title_statement"]
    title = title_statement["title"]
    logger.info(f"Creating dataset: {title}")
    study_info = study_desc["study_info"]
    data_collection = study_desc["method"]["data_collection"]
    sources = [x["name"] for x in study_desc["authoring_entity"]]
    methodology = list()
    data_kind = study_info.get("data_kind")
    if data_kind is not None:
        methodology.append(f"Kind of Data: {data_kind}  \n")
    unit_analysis = study_info.get("universe")
    if unit_analysis is None:
        unit_analysis = study_info.get("analysis_unit")
    if unit_analysis is not None:
        methodology.append(f"Unit of Analysis: {unit_analysis}  \n")
    sampling = data_collection.get("sampling_procedure")
    if sampling is not None:
        methodology.append(f"Sampling Procedure: {sampling}  \n")
    collection = data_collection.get("coll_mode")
    if collection is not None:
        methodology.append(f"Data Collection Mode: {collection}  \n")
    dataset_name = slugify(title_statement["idno"])
    countryiso3s = set()
    for nation in study_info["nation"]:
        countryiso3 = nation["abbreviation"]
        if not countryiso3:
            countryname = nation["name"]
            if countryname:
                countryiso3, _ = Country.get_iso3_country_code_fuzzy(
                    countryname)
        if countryiso3:
            countryiso3s.add(countryiso3)
    if len(countryiso3s) == 1:
        countryname = Country.get_country_name_from_iso3(min(countryiso3s))
        title = f"{countryname} - {title}"
    dataset = Dataset({
        "name": dataset_name,
        "title": title,
        "notes": study_info["abstract"],
        "dataset_source": ", ".join(sources),
        "methodology": "Other",
        "methodology_other": "".join(methodology),
    })
    dataset.set_maintainer("ac47b0c8-548b-4c37-a685-7377e75aad55")
    dataset.set_organization("abf4ca86-8e69-40b1-92f7-71509992be88")
    dataset.set_expected_update_frequency("Never")
    dataset.set_subnational(True)
    if output_failures:
        try:
            dataset.add_country_locations(countryiso3s)
        except HDXError:
            ui_url = configuration["ui_url"] % dataset_id
            url = f"{configuration['base_url']}{ui_url}"
            failures.append(
                f"Invalid country id {countryiso3s} in dataset {url}!")
            return None
    else:
        dataset.add_country_locations(countryiso3s)
    tags = list()

    def add_tags(inwords, key):
        for inword in inwords:
            inword = inword[key].strip().lower()
            if "," in inword:
                words = inword.split(",")
            elif "/" in inword:
                words = inword.split("/")
            else:
                words = [inword]
            newwords = list()
            for innerword in words:
                if "and" in innerword:
                    newwords.extend(innerword.split(" and "))
                elif "&" in innerword:
                    newwords.extend(innerword.split(" & "))
                elif "other" in innerword:
                    newwords.extend(innerword.split("other"))
                else:
                    newwords.append(innerword)
            for word in newwords:
                word = word.strip()
                if word:
                    tags.append(word.strip())

    add_tags(study_info["topics"], "topic")
    add_tags(study_info.get("keywords", list()), "keyword")
    dataset.add_tags(tags)
    dataset.clean_tags()
    coll_dates = study_info["coll_dates"][0]
    startdate, _ = parse_date_range(coll_dates["start"])
    _, enddate = parse_date_range(coll_dates["end"])
    dataset.set_date_of_dataset(startdate, enddate)

    auth_url = configuration["auth_url"] % dataset_id
    resourcedata = {
        "name": title,
        "description":
        'Clicking "Download" leads outside HDX where you can request access to the data in csv, xlsx & dta formats',
        "url": f"{configuration['base_url']}{auth_url}",
        "format": "web app",
    }
    dataset.add_update_resource(resourcedata)

    documentation_url = configuration["documentation_url"] % dataset_id
    resourcedata = {
        "name": "Codebook",
        "description":
        "Contains information about the dataset's metadata and data",
        "url": f"{configuration['base_url']}{documentation_url}",
        "format": "pdf",
    }
    dataset.add_update_resource(resourcedata)

    return dataset
Exemple #15
0
# #30DayMapChallenge
# Día 26: Nueva Herramienta -> Python
# Fragile States Index
# Datos: https://fragilestatesindex.org/excel/
# Autora: Stephanie Orellana (@sporella)

import pandas as pd
import geopandas as gp
import matplotlib.pyplot as plt
from hdx.location.country import Country
from pyproj import CRS

df = pd.read_excel("data/fsi-2020.xlsx")
df['iso_a3'] = df.apply(
    lambda row: Country.get_iso3_country_code_fuzzy(row["Country"])[0], axis=1)
world = gp.read_file(gp.datasets.get_path('naturalearth_lowres'))
world_dat = world.merge(df, on='iso_a3', how="left")

gdf = world_dat.to_crs(CRS("ESRI:54009"))

plt.rcParams.update({
    "text.color": "black",
    "axes.facecolor": "black",
    "axes.edgecolor": "black",
    "axes.labelcolor": "white",
    "xtick.color": "white",
    "ytick.color": "white",
    "grid.color": "lightgray",
    "figure.facecolor": "black",
    "figure.edgecolor": "black",
    "savefig.facecolor": "black",