Example #1
0
def eez(country_shapes):
    cntries = snakemake.config['countries']
    cntries3 = frozenset(countrycode(cntries, origin='iso2c', target='iso3c'))
    df = gpd.read_file(snakemake.input.eez)
    df = df.loc[df['ISO_3digit'].isin(cntries3)]
    df['name'] = countrycode(df['ISO_3digit'], origin='iso3c', target='iso2c')
    s = df.set_index('name').geometry.map(lambda s: _simplify_polys(s, filterremote=False))
    return gpd.GeoSeries({k:v for k,v in s.iteritems() if v.distance(country_shapes[k]) < 1e-3})
Example #2
0
def parse_Geoposition(loc, country=None, return_Country=False):
    """
    Nominatim request for the Geoposition of a specific location in a country.
    Returns a tuples with (latitude, longitude) if the request was sucessful,
    returns None otherwise.
    
    ToDo:   There exist further online sources for lat/long data which could be
            used, if this one fails, e.g.
        - Google Geocoding API
        - Yahoo! Placefinder
        - https://askgeo.com (??)

    Parameters
    ----------
    loc : string
        description of the location, can be city, area etc.
    country : string
        name of the country which will be used as a bounding area

    """
    from geopy.geocoders import Nominatim
    if loc is not None and loc != float:
        country = countrycode(codes=[country], origin='country_name', target='iso2c')[0]
        gdata = Nominatim(timeout=500, country_bias=country).geocode(loc)
        if gdata != None:
            if return_Country:
                return gdata.address.split(', ')[-1]
            lat = gdata.latitude
            lon = gdata.longitude
            return (lat, lon)
Example #3
0
    def get_country(self, row, path="#country", return_default=True):
        country = ""
        if path + "+identifier" in row.keys():
            country = row[path + '+identifier']
            if (country):
                return country.upper()

        if (len(country) < 2) and (path in row.keys()):
            if row.get(path, "xx") in self.country_cache.keys():
                country = self.country_cache[row.get(path, "xx")]
            else:
                country = countrycode(codes=[row.get(path, "")],
                                      origin='country_name',
                                      target="iso2c")[0]
                if (len(country) == 2):
                    self.country_cache[row.get(path, "xx")] = country
                else:
                    country = self.clean_string(row.get(path, "xx")).strip()
                    self.country_cache[row.get(path, "xx")] = country
        else:
            if return_default:
                country = self.default_country
            else:
                country = "unknown"

        return country.upper()
Example #4
0
def eez(subset=None, filter_remote=True, tolerance=0.03):
    names = []
    shapes = []
    countries3 = frozenset(countrycode(subset, origin='iso2c', target='iso3c'))
    with fiona.drivers(), fiona.open(toDataDir('World_EEZ/World_EEZ_v8_2014.shp')) as f:
        for sh in f:
            name = sh['properties']['ISO_3digit']
            if name in countries3:
                names.append(sh['properties']['ISO_3digit'])
                shapes.append(simplify_poly(shape(sh['geometry']), tolerance=tolerance))

    names = countrycode(names, origin='iso3c', target='iso2c')
    if filter_remote:
        country_shapes = countries(subset)
        return pd.Series(dict((name, shape)
                              for name, shape in zip(names, shapes)
                              if shape.distance(country_shapes[name]) < 1e-3)).sort_index()
    else:
        return pd.Series(shapes, index=names)
Example #5
0
 def get_ranges(self):
     code = countrycode.countrycode(codes=[self.country],
                                    origin='country_name',
                                    target='iso2c')[0]
     resp = requests.get(
         'http://www.ipdeny.com/ipblocks/data/aggregated/{}-aggregated.zone'
         .format(code.lower()))
     if 'title' in resp.text:
         self.ranges = []
         return False
     self.ranges = [r for r in resp.text.split('\n') if r.strip()]
     return True
Example #6
0
def normalizeCountry(country_str, target="iso3c", title_case=False):
    '''Return a normalized name/code for country in ``country_str``.
    The input can be a code or name, the ``target`` determines output value.
    3 character ISO code is the default (iso3c), 'country_name', and 'iso2c'
    are common also. See ``countrycode.countrycode`` for details and other
    options. Raises ``ValueError`` if the country is unrecognized.'''
    iso2 = "iso2c"
    iso3 = "iso3c"
    raw = "country_name"

    if country_str is None:
        return u''

    if len(country_str) == 2:
        cc = countrycode(country_str.upper(), origin=iso2, target=target)
        if not cc:
            cc = countrycode(country_str, origin=raw, target=target)
    elif len(country_str) == 3:
        cc = countrycode(country_str.upper(), origin=iso3, target=target)
        if not cc:
            cc = countrycode(country_str, origin=raw, target=target)
    else:
        cc = countrycode(country_str, origin=raw, target=target)

    # Still need to validate because origin=raw will return whatever is
    # input if not match is found.
    cc = countrycode(cc, origin=target, target=target) if cc else None
    if not cc:
        raise ValueError("Country not found: %s" % (country_str))

    return cc.title() if title_case else cc
Example #7
0
def normalizeCountry(country_str, target="iso3c", title_case=False):
    """Return a normalized name/code for country in ``country_str``.
    The input can be a code or name, the ``target`` determines output value.
    3 character ISO code is the default (iso3c), 'country_name', and 'iso2c'
    are common also. See ``countrycode.countrycode`` for details and other
    options. Raises ``ValueError`` if the country is unrecognized."""
    iso2 = "iso2c"
    iso3 = "iso3c"
    raw = "country_name"

    if country_str is None:
        return ''

    if len(country_str) == 2:
        cc = countrycode(country_str.upper(), origin=iso2, target=target)
        if not cc:
            cc = countrycode(country_str, origin=raw, target=target)
    elif len(country_str) == 3:
        cc = countrycode(country_str.upper(), origin=iso3, target=target)
        if not cc:
            cc = countrycode(country_str, origin=raw, target=target)
    else:
        cc = countrycode(country_str, origin=raw, target=target)

    # Still need to validate because origin=raw will return whatever is
    # input if not match is found.
    cc = countrycode(cc, origin=target, target=target) if cc else None
    if not cc:
        raise ValueError("Country not found: %s" % (country_str))

    return cc.title() if title_case else cc
Example #8
0
    def get_country_code_from_name(self, name):
        try:
            country = self.country_cache[name]
        except KeyError:
            country = countrycode(codes=[name],
                                  origin='country_name',
                                  target="iso2c")[0]
            if (len(country) == 2):
                self.country_cache[name] = country.upper()
            else:
                country = random_string()
                self.country_cache[name] = country.upper()

        return country.upper()
Example #9
0
def country_input(require_present=True, string=None):
    if string is None:
        string = "Enter the delegation's name: "
    while True:
        output = countrycode.countrycode(codes=[input(string)],
                                         origin='country_name',
                                         target='iso2c')[0].upper()
        for delegation in state.delegations:
            if delegation.country_code == output:
                if not require_present or delegation.present:
                    return state.delegations.index(delegation)
                else:
                    print("Delegation is not present.")
                    break
        else:
            print("Delegation not found, try the country code?")
Example #10
0
def parse_dataset(data, private=True):
    '''
    Function that parses a dataset.

    '''
    #
    #  Check that there is acually
    #  metadata to parse.
    #
    # if data.get('worldPopData') is None:
    #     raise ValueError('No data to parse.')

    resource = {
        "package_id": str(slugify(data['Dataset Title']))[:90],
        "url": data['URL_direct'],
        "name": data['Location'] + '.zip',
        "format": 'zip',
        "description": None
    }

    metadata = {
        'name': str(slugify(data['Dataset Title']))[:90],
        'title': str(data['Dataset Title']),
        'owner_org': 'worldpop',
        'author': 'andytatem',
        'author_email': '*****@*****.**',
        'maintainer': 'andytatem',
        'maintainer_email': '*****@*****.**',
        'license_id': 'cc-by-sa',
        'dataset_date': None,    # has to be MM/DD/YYYY
        'subnational': 1,    # has to be 0 or 1. Default 1 for WorldPop.
        'notes': data['Description'],
        'caveats': None,
        'methodology': 'Other',
        'methodology_other': 'For more information about methods, please refer to ' + data['URL_summaryPag'],
        'dataset_source': data['Source'],
        'package_creator': 'luiscape',
        'private': private,    # has to be True or False
        'url': None,
        'state': 'active',    # always "active".
        'tags': [{ 'name': 'Map' }, { 'name': 'Population' }],    # has to be a list with { 'name': None }
        'groups': [ { 'id': countrycode(codes=str(data['Location']), origin='country_name', target='iso3c').lower() }]    # has to be ISO-3-letter-code. { 'id': None }
    }

    return { 'metadata': metadata, 'resource': resource }
Example #11
0
 def __init__(self, country_code):
     self.country_code = country_code
     self.country = countrycode.countrycode(codes=[country_code],
                                            origin='iso2c',
                                            target='country_name')[0]
     if self.country is None:
         raise Exception("Invalid country code '" + country_code + "'")
     self.veto = country_code in config['committee']['veto']
     self.speech_time = 0
     self.poi_time = 0
     self.poi_answer_time = 0
     self.motions_raised = 0
     self.pois_raised = 0
     self.amendments_made = 0
     self.votes = [0, 0,
                   0]  # keeps track of votes for, against and abstentions
     self.veto_used = 0
     self.present = False
     self.no_abstentions = False
def collect_countries():
    '''
    Collects lists of countries and links from
    ACLED's website.

    '''
    u = 'http://www.acleddata.com/data/version-6-data-1997-2015/'
    level = 7
    r = requests.get(u)

    soup = BeautifulSoup(r.content, 'html.parser')
    x = soup.findAll('ul')

    countries = []
    for l in x[level]:
        if len(l) == 2:
            countries.append({
                'name': l.get_text().replace(' (xls)', ''),
                'url': l.findAll('a')[0].get('href'),
                'iso': countrycode(l.get_text().replace(' (xls)', ''), 'country_name', 'iso3c').lower()
            })

    return countries
Example #13
0
def get_eia_annual_hydro_generation(fn=None):
    if fn is None:
        fn = toDataDir('Hydro_Inflow/EIA_hydro_generation_2000_2014.csv')

    # in billion KWh/a = TWh/a
    eia_hydro_gen = pd.read_csv(fn,
                                skiprows=4,
                                index_col=1,
                                na_values=[u' ',
                                           '--']).drop(
                                               ['Unnamed: 0', 'Unnamed: 2'],
                                               axis=1).dropna(how='all')

    countries_iso2c = countrycode(eia_hydro_gen.index.values,
                                  origin='country_name',
                                  target='iso2c')

    eia_hydro_gen.index = pd.Index(countries_iso2c, name='countries')
    eia_hydro_gen.rename(index={'Kosovo': 'KV'}, inplace=True)

    eia_hydro_gen = eia_hydro_gen.T

    return eia_hydro_gen * 1e6  #in MWh/a
Example #14
0
def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame,
              who_coding: pd.DataFrame):
    """
    Apply transformations to CDC_ITF records.

    Parameters
    ----------
    record : dict
        Input record.
    key_ref : dict
        Reference for key mapping.
    country_ref : pd.DataFrame
        Reference for WHO accepted country names.
    who_coding : pd.DataFrame
        Reference for WHO coding.

    Returns
    -------
    dict
        Record with transformations applied.

    """

    # 1. Join comments in ``Concise Notes`` and ``Notes`` columns
    comments = join_comments(record)

    # 2. Create a new blank record
    new_record = utils.generate_blank_record()

    # 3. replace data in new record with data from old record using key_ref
    record = utils.apply_key_map(new_record, record, key_ref)

    # 4. Assign merged comments to new record
    record['comments'] = comments

    # 6. Assign unique ID (shared)
    #record = utils.assign_id(record)

    # If area_covered is national, set to blank
    record = area_covered_national(record)

    # 5. Handle date formatting
    record = utils.parse_date(record)

    # 6. Assign date_end with measure_stage value
    record = add_date_end(record)

    # 7. Make manual country name changes
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Saint Martin', 'French Saint Martin')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Réunion', 'Reunion')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Curaçao', 'Curacao')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Curaçao', 'Curacao')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Curaçao', 'Curacao')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'St. Barts', 'Saint Barthelemy')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Czechia', 'Czech Republic')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'D. P. R. of Korea', 'North Korea')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Eswatini', 'Swaziland')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'South Korea', 'Korea')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Bonaire, Saint Eustatius and Saba',
                                       'Carribean Netherlands')

    # 7. Make manual measure_stage name changes
    record = utils.replace_conditional(record, 'measure_stage', 'Impose',
                                       'new')
    record = utils.replace_conditional(record, 'measure_stage', 'Lift',
                                       'phase-out')
    record = utils.replace_conditional(record, 'measure_stage', 'Pause',
                                       'modification')
    record = utils.replace_conditional(record, 'measure_stage', 'Ease',
                                       'modification')
    record = utils.replace_conditional(record, 'measure_stage', 'Strengthen',
                                       'modification')

    # 7. Make manual non_compliance_penalty name changes
    record = utils.replace_conditional(record, 'non_compliance_penalty', 'Yes',
                                       'not known')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Yes ', 'not known')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'yes ', 'not known')
    record = utils.replace_conditional(record, 'non_compliance_penalty', 'yes',
                                       'not known')
    record = utils.replace_conditional(record, 'non_compliance_penalty', 'No',
                                       None)
    record = utils.replace_conditional(record, 'non_compliance_penalty', "No'",
                                       None)

    # 8. replace sensitive country names
    record = utils.replace_sensitive_regions(record)

    # 9. assign ISO code
    record['iso'] = countrycode(codes=record['country_territory_area'],
                                origin='country_name',
                                target='iso3c')

    # 10. check missing ISO
    check.check_missing_iso(record)

    # 11. Join WHO accepted country names (shared)
    record = utils.assign_who_country_name(record, country_ref)

    # 12. Join who coding from lookup (shared)
    record = utils.assign_who_coding(record, who_coding)

    # 13. check for missing WHO codes (shared)
    check.check_missing_who_code(record)

    # 14. set all admin_level values to national
    record = utils.replace_conditional(record, 'admin_level',
                                       'Subnational/regional only', 'other')
    record = utils.replace_conditional(record, 'admin_level',
                                       'subnational/regional only', 'other')
    record = utils.replace_conditional(record, 'admin_level', 'National',
                                       'national')

    # 15. Replace measure_stage extension
    record = utils.replace_conditional(record, 'measure_stage',
                                       'Extend with same stringency',
                                       'extension')

    # 16. Add WHO PHSM admin_level values
    record = utils.add_admin_level(record)

    record = utils.remove_tags(record, ['comments', 'link', 'alt_link'])

    return (record)
Example #15
0
def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame,
              who_coding: pd.DataFrame, prov_measure_filter: pd.DataFrame):
    """
    Apply transformations to JH_HIT records.

    Parameters
    ----------
    record : dict
        Input record.
    key_ref : dict
        Reference for key mapping.
    country_ref : pd.DataFrame
        Reference for WHO accepted country names.
    who_coding : pd.DataFrame
        Reference for WHO coding.
    prov_measure_filter : pd.DataFrame
        Reference for filtering by `prov_measure` values.

    Returns
    -------
    dict
        Record with transformations applied.

    """

    # 1.
    if pd.isnull(record['locality']) and pd.isnull(record['usa_county']):
        return (None)

    # 2. generator function of new record with correct keys (shared)
    new_record = utils.generate_blank_record()

    # 3. replace data in new record with data from old record using column
    # reference (shared)
    record = utils.apply_key_map(new_record, record, key_ref)

    # 4.
    record = apply_prov_measure_filter(record, prov_measure_filter)

    # replace with a None - passing decorator
    if record is None:
        return (None)

    # 5. Handle date - infer format (shared)
    record = utils.parse_date(record)

    # 6. Assign unique ID (shared)
    #record = utils.assign_id(record)

    # 7. replace non ascii characters (shared)

    # 8. replace sensitive country names by ISO (utils)
    record = utils.replace_sensitive_regions(record)

    # 9. assign ISO code
    record['iso'] = countrycode(codes=record['country_territory_area'],
                                origin='country_name',
                                target='iso3c')

    # 10. check for missing ISO codes (shared)
    check.check_missing_iso(record)

    # 11. Join WHO accepted country names (shared)
    record = utils.assign_who_country_name(record, country_ref)

    # 12. Join who coding from lookup (shared)
    record = utils.assign_who_coding(record, who_coding)

    # 13. check for missing WHO codes (shared)
    check.check_missing_who_code(record)

    # 14. replace admin_level values
    record = utils.replace_conditional(record, 'admin_level', '', 'unknown')
    record = utils.replace_conditional(record, 'admin_level', 'Yes',
                                       'national')
    record = utils.replace_conditional(record, 'admin_level', 'No', 'state')

    # Replace JH enforcement == 'unknown' with None
    record = utils.replace_conditional(record, 'enforcement', 'unknown', None)

    # Replace JH targeter values
    record = utils.replace_conditional(record, 'targeted',
                                       'geographic subpobulation', None)
    record = utils.replace_conditional(record, 'targeted', 'entire population',
                                       None)

    # 15. fill_not_enough_to_code
    record = fill_not_enough_to_code(record)

    # 16. replace unknown non_compliance_penalty
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'unknown', 'Not Known')

    record = utils.remove_tags(record)

    record = blank_record_and_url(record)

    return (record)
Example #16
0
def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame,
              who_coding: pd.DataFrame):
    """
    Apply transformations to EURO records.

    Parameters
    ----------
    record : dict
        Input record.
    key_ref : dict
        Reference for key mapping.
    country_ref : pd.DataFrame
        Reference for WHO accepted country names.
    who_coding : pd.DataFrame
        Reference for WHO coding.

    Returns
    -------
    dict
        Record with transformations applied.

    """

    # 1. Create a new blank record
    new_record = utils.generate_blank_record()

    # 2. replace data in new record with data from old record using key_ref
    record = utils.apply_key_map(new_record, record, key_ref)
    #print(record)
    #print(record["prop_id"])
    # 3. Make manual country name changes
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Kosovo*', 'Kosovo')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Bewlgium', 'Belgium')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Luxemburg', 'Luxembourg')

    # Replace enforcement values
    record = utils.replace_conditional(record, 'enforcement', ' ', 'not known')
    record = utils.replace_conditional(record, 'enforcement', 0,
                                       'not applicable')
    record = utils.replace_conditional(record, 'enforcement', '0',
                                       'not applicable')
    record = utils.replace_conditional(record, 'enforcement', 1, 'recommended')
    record = utils.replace_conditional(record, 'enforcement', '1',
                                       'recommended')
    record = utils.replace_conditional(record, 'enforcement', '2', 'required')
    record = utils.replace_conditional(record, 'enforcement', 2, 'required')
    record = utils.replace_conditional(record, 'enforcement', 3, 'monitored')

    # Replace measure_stage values
    record = utils.replace_conditional(record, 'measure_stage', 1, 'new')
    record = utils.replace_conditional(record, 'measure_stage', '1', 'new')
    record = utils.replace_conditional(record, 'measure_stage', 2,
                                       'modification')
    record = utils.replace_conditional(record, 'measure_stage', '2',
                                       'modification')
    record = utils.replace_conditional(record, 'measure_stage', 3, 'phase out')
    record = utils.replace_conditional(record, 'measure_stage', '3',
                                       'phase out')

    # Change a who_code value based on measure_stage
    record = update_school_record(record)

    # Strip whitespace characters from coding
    record['prov_category'] = record['prov_category'].strip()
    record['prov_subcategory'] = record['prov_subcategory'].strip()
    record['prov_measure'] = record['prov_measure'].strip()

    # 4. replace sensitive country names by ISO (utils)
    record = utils.replace_sensitive_regions(record)

    record['iso'] = countrycode(codes=record['country_territory_area'],
                                origin='country_name',
                                target='iso3c')

    # 6. check for missing ISO codes (shared)
    check.check_missing_iso(record)

    # 7. Join WHO accepted country names (shared)
    record = utils.assign_who_country_name(record, country_ref)

    # 12. Join who coding from lookup (shared)
    record = utils.assign_who_coding(record, who_coding)

    # 13. check for missing WHO codes (shared)
    check.check_missing_who_code(record)

    # 8. Add WHO PHSM admin_level values
    record = utils.add_admin_level(record)

    record = utils.remove_tags(record)

    return record
Example #17
0
def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame,
              who_coding: pd.DataFrame):
    """
    Apply transformations to ACAPS records.

    Parameters
    ----------
    record : dict
        Input record.
    key_ref : dict
        Reference for key mapping.
    country_ref : pd.DataFrame
        Reference for WHO accepted country names.
    who_coding : pd.DataFrame
        Reference for WHO coding.

    Returns
    -------
    dict
        Record with transformations applied.

    """

    # 1. Create a new blank record
    new_record = utils.generate_blank_record()

    # 2. replace data in new record with data from old record using key_ref
    record = utils.apply_key_map(new_record, record, key_ref)

    # Remove records where area covered is a single space
    if record['area_covered'] == ' ':

        record['area_covered'] = ''

    # 6. Assign unique ID (shared)
    #record = utils.assign_id(record)

    # shift areas that should be countries.
    record = utils.replace_country(record, 'Denmark', 'Greenland')

    # 3. Make manual country name changes
    record = utils.replace_conditional(record, 'country_territory_area', 'DRC',
                                       'Democratic Republic of the Congo')
    record = utils.replace_conditional(record, 'country_territory_area', 'CAR',
                                       'Central African Republic')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'DPRK', 'North Korea')
    record = utils.replace_conditional(record, 'country_territory_area',
                                       'Eswatini', 'Swaziland')

    # Make manual measure_stage changes
    record = utils.replace_conditional(record, 'measure_stage',
                                       'Introduction / extension of measures',
                                       'introduction / extension of measures')
    record = utils.replace_conditional(record, 'measure_stage',
                                       'Phase-out measure', 'phase-out')

    # Make manual non_compliance_penalty changes
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Legal Action', 'legal action')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Legal action', 'legal action')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Legal', 'legal action')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Up to detention', 'up to detention')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Up to Detention', 'up to detention')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Arrest/Detention', 'arrest/detention')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Deportation', 'deportation')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Refusal to enter the country',
                                       'refused entry')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Refusal to enter the Country',
                                       'refused entry')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Refusal to Enter the Country',
                                       'refused entry')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Other (add in comments)', 'not known')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Fines', 'fines')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Other', 'not known')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Not Available', 'not known')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Not available', 'not known')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Not available ', 'not known')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'not available ', 'not known')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Not Applicable', 'not applicable')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'Not applicable', 'not applicable')
    record = utils.replace_conditional(record, 'non_compliance_penalty',
                                       'not applicable ', 'not known')

    # Replace targeted values
    record = utils.replace_conditional(record, 'targeted', 'checked', None)
    record = utils.replace_conditional(record, 'targeted', 'Checked', None)
    record = utils.replace_conditional(record, 'targeted', 'general', None)
    record = utils.replace_conditional(record, 'targeted', 'General', None)

    # 4. replace sensitive country names by ISO (utils)
    record = utils.replace_sensitive_regions(record)

    # 5. assign ISO code
    record['iso'] = countrycode(codes=record['country_territory_area'],
                                origin='country_name',
                                target='iso3c')

    # 6. check for missing ISO codes (shared)
    check.check_missing_iso(record)

    # 7. Join WHO accepted country names (shared)
    record = utils.assign_who_country_name(record, country_ref)

    # 12. Join who coding from lookup (shared)
    record = utils.assign_who_coding(record, who_coding)

    # 13. check for missing WHO codes (shared)
    check.check_missing_who_code(record)

    # 8. Add WHO PHSM admin_level values
    record = utils.add_admin_level(record)

    record = utils.remove_tags(record)

    return (record)
def parse_dataset(data, private=True, fail_no_country=True):
    """
  Function that parses a dataset.

  """
    #
    #  Check that there is acually
    #  metadata to parse.
    #
    if data.get("latestVersion") is None:
        raise ValueError("No data to parse.")

    if data["latestVersion"]["metadataBlocks"].get("geospatial") is None:
        raise ValueError("No country entry found.")

    resource = {"package_id": None, "url": None, "name": None, "format": None, "description": None}

    metadata = {
        "name": None,
        "title": None,
        "owner_org": "ifpri",
        "author": "ifpridata",
        "author_email": "*****@*****.**",
        "maintainer": "ifpridata",
        "maintainer_email": "*****@*****.**",
        "license_id": "cc-by-sa",
        "dataset_date": None,  # has to be MM/DD/YYYY
        "subnational": 1,  # has to be 0 or 1. Default 1 for IFPRI.
        "notes": None,
        "caveats": None,
        "data_update_frequency": "0",
        "methodology": "Other",
        "methodology_other": None,
        "dataset_source": "",
        "package_creator": "luiscape",
        "private": private,  # has to be True or False
        "url": None,
        "state": "active",  # always "active".
        "tags": [{"name": "Food"}, {"name": "Security"}],  # has to be a list with { 'name': None }
        "groups": [],  # has to be ISO-3-letter-code. { 'id': None }
    }

    gallery = {
        "title": None,
        "type": "paper",
        "description": None,
        "url": None,
        "image_url": "http://www.ifpri.org/sites/all/themes/custom/serenity/logo.png",  # IFPRI's logo.
        "dataset_id": None,
    }

    #
    #  Parsing for:
    #
    #    - metadata name
    #    - metadata title
    #    - metadata dataset_date
    #    - metadata notes
    #    - metadata groups (countries)
    #    - metadata source
    #
    for field in data["latestVersion"]["metadataBlocks"]["citation"]["fields"]:

        if field.get("typeName") == "title":
            metadata["title"] = str(field["value"])
            metadata["name"] = str(slugify(field["value"]))[:90]

        if field.get("typeName") == "timePeriodCovered":
            for f in field["value"]:
                if f.get("timePeriodCoveredStart") is not None:
                    metadata["dataset_date"] = str(f["timePeriodCoveredStart"]["value"])
                else:
                    metadata["dataset_date"] = ""

        authors = []
        if field.get("typeName") == "author":
            for f in field["value"]:
                if f["authorName"].get("value") is not None:
                    authors.append(f["authorName"].get("value"))

            metadata["dataset_source"] = ", ".join(authors)

        if field.get("typeName") == "dsDescription":
            metadata["notes"] = str(field.get("value")[0].get("dsDescriptionValue").get("value"))

    for location in data["latestVersion"]["metadataBlocks"]["geospatial"]["fields"]:
        if location.get("typeName") == "geographicCoverage":
            for country in location["value"]:
                if country.get("country") is not None:
                    name = country["country"].get("value")
                    code = countrycode(codes=str(name), origin="country_name", target="iso3c")
                    result = {"id": code.lower()}
                    metadata["groups"].append(result)
                else:
                    if fail_no_country:
                        raise ValueError("No country entry found.")
                    else:
                        pass

    resources = []
    desired_file_extensions = ["xls", "xlsx", "csv", "zip", "tsv", "shp", "geojson", "json"]
    for file in data["latestVersion"]["files"]:

        #
        #  Checking for data file.
        #
        file_name = file.get("datafile").get("name")

        if file_name is not None:
            extension = os.path.splitext(file_name)[1][1:].lower()
            if extension in desired_file_extensions:
                resource["package_id"] = metadata["name"]
                resource["url"] = "https://dataverse.harvard.edu/api/access/datafile/" + str(file["datafile"].get("id"))
                resource["name"] = file_name
                resource["format"] = extension.upper()

                resources.append(copy(resource))

        else:
            continue

    return {"metadata": metadata, "resources": resources}
Example #19
0
def transform(record: dict, key_ref: dict, country_ref: pd.DataFrame, who_coding: pd.DataFrame, no_update_phrase: pd.DataFrame):
    """
    Apply transformations to OXCGRT records.

    Parameters
    ----------
    record : dict
        Input record.
    key_ref : dict
        Reference for key mapping.
    country_ref : pd.DataFrame
        Reference for WHO accepted country names.
    who_coding : pd.DataFrame
        Reference for WHO coding.
    no_update_phrase : pd.DataFrame
        Reference for "no update" phrases.

    Returns
    -------
    dict
        Record with transformations applied.

    """

    # 1. generator function of new record with correct keys (shared)
    new_record = utils.generate_blank_record()

    # 2. replace data in new record with data from old record using column
    # reference (shared)
    record = utils.apply_key_map(new_record, record, key_ref)

    # 3. Assign unique ID (shared)
    # record = utils.assign_id(record)
    if record["prov_measure"] == "H8_Protection of elderly people":

        return None

    # 4. Handle date formatting
    record = utils.parse_date(record)

    # 8. replace sensitive country names
    record = utils.replace_sensitive_regions(record)

    # shift areas that should be countries.
    record = utils.replace_country(record, 'United States', 'Virgin Islands')

    # 7. Make manual country name changes
    record = utils.replace_conditional(record, 'country_territory_area', 'Virgin Islands', 'US Virgin Islands')
    record = utils.replace_conditional(record, 'country_territory_area', 'United States Virgin Islands', 'US Virgin Islands')
    record = utils.replace_conditional(record, 'country_territory_area', 'Eswatini', 'Swaziland')
    record = utils.replace_conditional(record, 'country_territory_area', 'South Korea', 'Korea')

    # 9. assign ISO code
    record['iso'] = countrycode(codes=record['country_territory_area'], origin='country_name', target='iso3c')

    # 10. check missing ISO
    check.check_missing_iso(record)

    # Remove records where there is no data in prov_subcategory
    if record['prov_subcategory'] == 0:

        return(None)

    # Removes information in flag variables for now
    record['prov_subcategory'] = int(record['prov_subcategory'])

    # 11. Join WHO accepted country names (shared)
    record = utils.assign_who_country_name(record, country_ref)

    record = financial_measures(record)

    # 12. Join who coding from lookup (shared)
    record = utils.assign_who_coding(record, who_coding)

    # 13. check for missing WHO codes (shared)
    check.check_missing_who_code(record)

    # 16. Add WHO PHSM admin_level values
    record = utils.add_admin_level(record)

    record = utils.remove_tags(record)

    # 17. Remove update records
    record = assign_comment_links(record)

    # Filter out records with "no update" phrases
    record = label_update_phrase(record, list(no_update_phrase['phrase']))

    return(record)