Example #1
0
 def _parse_geo(self, rrset):
     try:
         loc = rrset['GeoLocation']
     except KeyError:
         # No geo loc
         return
     try:
         return loc['ContinentCode']
     except KeyError:
         # Must be country
         cc = loc['CountryCode']
         if cc == '*':
             # This is the default
             return
         cn = cca_to_ctca2(cc)
         try:
             return '{}-{}-{}'.format(cn, cc, loc['SubdivisionCode'])
         except KeyError:
             return '{}-{}'.format(cn, cc)
Example #2
0
def lookup(ip):
  try:
    country_code = gi.country_code_by_addr(ip)
    continent = transformations.cca_to_ctca2(country_code)
  except Exception, e:
    return (None, None)
Example #3
0
    def parse_IEA_countries(self, force_reload=False):
        processed_filename = os.path.join(base_tmp_dir,
                                          'IEA_countries_2017.pkl')
        try:
            if force_reload:
                raise
            with open(processed_filename, "rb") as f:
                (self.iea_specified_country_codes,
                 self.iea_country_codes_to_named_region,
                 self.middle_eastern_country_codes,
                 self.non_OECD_Eurasian_country_codes,
                 self.non_OECD_asia_country_codes, self.african_country_codes,
                 self.non_oecd_americas_country_codes) = pickle.load(f)
        except:

            # Sort the IEA data in some not insane way
            middle_eastern_countries = [
                'Bahrain', 'Iran', 'Iraq', 'Jordan', 'Kuwait', 'Lebanon',
                'Oman', 'Qatar', 'Saudi Arabia', 'Syrian Arab Republic',
                'United Arab Emirates', 'Yemen'
            ]
            non_OECD_Eurasian_countries = [
                'Albania', 'Armenia', 'Azerbaijan', 'Belarus',
                'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Cyprus',
                'Macedonia', 'Georgia', 'Gibraltar', 'Kazakhstan',
                'Kyrgyzstan', 'Lithuania', 'Malta', 'Moldova', 'Montenegro',
                'Romania', 'Russian Federation', 'Serbia', 'Tajikistan',
                'Turkmenistan', 'Ukraine', 'Uzbekistan'
            ]
            non_OECD_asia = [
                'Bangladesh', 'Brunei Darussalam', 'Cambodia',
                'Democratic People\xe2\x80\x99s Republic of Korea', 'India',
                'Indonesia', 'Malaysia', 'Mongolia', 'Myanmar', 'Nepal',
                'Pakistan', 'Philippines', 'Singapore', 'Sri Lanka',
                'Taiwan, province of china', 'Thailand', 'Vietnam',
                'Afghanistan', 'Bhutan', 'Cambodia', 'Cook Islands', 'Fiji',
                'French Polynesia', 'Kiribati',
                "Lao People's Democratic Republic", 'Macao', 'Maldives',
                'Mongolia', 'New Caledonia', 'Palau', 'Papua New Guinea',
                'Samoa', 'Solomon Islands', 'Tonga', 'Vanuatu'
            ]

            self.middle_eastern_country_codes = [
                chf.find_country_by_name(country)
                for country in middle_eastern_countries
            ]
            self.non_OECD_Eurasian_country_codes = [
                chf.find_country_by_name(country)
                for country in non_OECD_Eurasian_countries
            ]
            self.non_OECD_asia_country_codes = [
                chf.find_country_by_name(country) for country in non_OECD_asia
            ]

            self.iea_specified_country_codes = {}
            for country in self.IEA_data.Country.unique().tolist():
                try:
                    code = chf.find_country_by_name(country)
                    self.iea_specified_country_codes[code] = country
                except:
                    pass
            self.iea_specified_country_codes[
                u'CHN'] = "People's Republic of China"
            self.iea_specified_country_codes[u'KOR'] = "Korea"
            self.iea_specified_country_codes[u'SVK'] = "Slovak Republic"
            self.iea_specified_country_codes[u'CZE'] = "Czech Republic"

            self.iea_country_codes_to_named_region = copy.deepcopy(
                self.iea_specified_country_codes)
            self.iea_country_codes_to_named_region[
                u'HKG'] = "People's Republic of China"

            self.iea_country_codes_to_named_region.update({
                code: 'Middle East'
                for code in self.middle_eastern_country_codes
            })
            self.iea_country_codes_to_named_region.update({
                code: 'Non-OECD Europe and Eurasia'
                for code in self.non_OECD_Eurasian_country_codes
            })
            self.iea_country_codes_to_named_region.update({
                code: 'Non-OECD Asia'
                for code in self.non_OECD_asia_country_codes
            })

            self.african_country_codes = []
            self.non_oecd_americas_country_codes = []
            for iso3 in chf.country_iso3_list:
                if iso3 not in self.iea_country_codes_to_named_region:
                    try:
                        cont_code = transformations.cca_to_ctca2(iso3)
                        if cont_code == u'AF':
                            self.iea_country_codes_to_named_region[
                                iso3] = 'Africa'
                            self.african_country_codes.append(iso3)
                        elif cont_code == u'NA' or cont_code == u'SA':
                            self.iea_country_codes_to_named_region[
                                iso3] = 'Non-OECD Americas'
                            self.non_oecd_americas_country_codes.append(iso3)
                    except:
                        pass
            with open(processed_filename, 'wb') as f:
                data_vars = (self.iea_specified_country_codes,
                             self.iea_country_codes_to_named_region,
                             self.middle_eastern_country_codes,
                             self.non_OECD_Eurasian_country_codes,
                             self.non_OECD_asia_country_codes,
                             self.african_country_codes,
                             self.non_oecd_americas_country_codes)

                pickle.dump(data_vars, f)
Example #4
0
 def _continent(cls, country_code):
     continent_code = transformations.cca_to_ctca2(country_code)
     continent = transformations.cca_to_ctn(country_code)
     return continent_code, continent
Example #5
0
def migrate_repo(repo):
    # the various components we need to assemble
    opendoar = {}
    metadata = {}
    organisation = {}
    contacts = []
    apis = []
    statistics = {}
    register = {}
    software = {}
    policies = []

    # a record of the patches to be applied to the data (mostly come from the policy data)
    patches = []
    
    # original opendoar id
    odid = repo.get("rID")
    if odid is not None:
        opendoar["rid"] = odid
    
    # repository name
    _extract(repo, "rName", metadata, "name", unescape=True)
    
    # repository acronym
    _extract(repo, "rAcronym", metadata, "acronym", unescape=True)
    
    # repository url
    _extract(repo, "rUrl", metadata, "url")
    
    # oai base url
    oai = {"api_type" : "oai-pmh"}
    _extract(repo, "rOaiBaseUrl", oai, "base_url")
    if "base_url" in oai:
        apis.append(oai)
    
    # organisational details
    _extract(repo, "uName", organisation, "unit", unescape=True)
    _extract(repo, "uAcronym", organisation, "unit_acronym", unescape=True)
    _extract(repo, "uUrl", organisation, "unit_url")
    _extract(repo, "oName", organisation, "name", unescape=True)
    _extract(repo, "oAcronym", organisation, "acronym", unescape=True)
    _extract(repo, "oUrl", organisation, "url")
    _extract(repo, "paLatitude", organisation, "lat", cast=float)
    _extract(repo, "paLongitude", organisation, "lon", cast=float)
    
    cel = repo.find("country")
    _extract(cel, "cIsoCode", metadata, "country_code", lower=True)
    _extract(cel, "cIsoCode", organisation, "country_code", lower=True)

    isocode = cel.find("cIsoCode")
    if isocode is not None:
        code = isocode.text
        if code is not None and code != "":
            try:
                # specify the continent in the metadata
                continent_code = transformations.cca_to_ctca2(code)
                metadata["continent_code"] = continent_code.lower()
                continent = transformations.cca_to_ctn(code)
                metadata["continent"] = continent

                # normalised country name
                country = pycountry.countries.get(alpha2=code.upper()).name
                metadata["country"] = country
                organisation["country"] = country
            except KeyError:
                pass
    
    # repository description
    _extract(repo, "rDescription", metadata, "description", unescape=True)
    
    # remarks
    _extract(repo, "rRemarks", metadata, "description", unescape=True, append=True, prepend="  ")
    
    # statistics
    _extract(repo, "rNumOfItems", statistics, "value", cast=int)
    _extract(repo, "rDateHarvested", statistics, "date")
    
    # established date
    _extract(repo, "rYearEstablished", metadata, "established_date")
    
    # repository type
    _extract(repo, "repositoryType", metadata, "repository_type", aslist=True)
    
    # operational status
    _extract(repo, "operationalStatus", register, "operational_status")
    
    # software
    _extract(repo, "rSoftWareName", software, "name", unescape=True)
    _extract(repo, "rSoftWareVersion", software, "version")
    
    # subject classifications
    classes = repo.find("classes")
    if classes is not None:
        metadata["subject"] = []
        for c in classes:
            subject = {}
            _extract(c, "clCode", subject, "code")
            _extract(c, "clTitle", subject, "term", unescape=True)
            metadata["subject"].append(subject.get("term")) # FIXME: a bit of a round trip here, but will suffice
    
    # languages
    langs = repo.find("languages")
    if langs is not None:
        metadata["language_code"] = []
        metadata["language"] = []
        for l in langs:
            code = l.find("lIsoCode")
            if code is not None and code.text != "":
                lc = code.text.lower()
                lang = pycountry.languages.get(alpha2=lc).name
                metadata["language_code"].append(lc)
                metadata["language"].append(lang)
    
    # content types
    ctel = repo.find("contentTypes")
    if ctel is not None:
        metadata["content_type"] = []
        for ct in ctel:
            metadata["content_type"].append(ct.text)
    
    # policies
    polel = repo.find("policies")
    for p in polel:
        policy = {}
        _extract(p, "policyType", policy, "policy_type")
        posel = p.find("poStandard")
        if posel is not None:
            policy["terms"] = []
            for item in posel:
                t = item.text.strip()

                # only keep terms which have mappings in the policy map
                mapped = policy_map.get(t)
                if mapped is not None:
                    policy["terms"].append(mapped)

                # look for any special instructions on the term
                patch = instruction_map.get(t)
                if patch is not None:
                    patches.append(patch)

        if len(policy.get("terms", [])) > 0:
            policies.append(policy)
    
    # contacts
    conel = repo.find("contacts")
    for contact in conel:
        cont_details = {}
        _extract(contact, "pName", cont_details, "name", unescape=True)
        _extract(contact, "pJobTitle", cont_details, "job_title", unescape=True)
        _extract(contact, "pEmail", cont_details, "email")
        _extract(contact, "pPhone", cont_details, "phone")
        
        has_phone = contact.find("pPhone") is not None and contact.find("pPhone").text is not None
        
        # add the top level repo data about address and phone
        _extract(repo, "postalAddress", cont_details, "address", unescape=True)
        if not has_phone:
            _extract(repo, "paPhone", cont_details, "phone")
        _extract(repo, "paFax", cont_details, "fax")

        # we also add the top level stuff about lat/lon
        if organisation.get("lat") is not None:
            cont_details["lat"] = organisation.get("lat")
        if organisation.get("lon") is not None:
            cont_details["lon"] = organisation.get("lon")

        # record the job title as the contact role for the time being
        full_record = {"details" : cont_details}
        _extract(contact, "pJobTitle", full_record, "role", unescape=True, aslist=True)
        
        contacts.append(full_record)

    # now assemble the object
    register["metadata"] = [
        {
            "lang" : "en",
            "default" : True,
            "record" : metadata
        }
    ]

    if len(software.keys()) > 0:
        register["software"] = [software]
    if len(contacts) > 0:
        register["contact"] = contacts
    if len(organisation.keys()) > 0:
        register["organisation"] = [{"details" : organisation, "role" : ["host"]}] # add a default role
    if len(policies) > 0:
        register["policy"] = policies
    if len(apis) > 0:
        register["api"] = apis

    # final few opendoar admin values
    opendoar["in_opendoar"] = True
    opendoar["last_saved"] = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
    
    record = {
        "register" : register,
        "admin" : {
            "opendoar" : opendoar
        }
    }
    
    statistics["third_party"] = "opendoar"
    statistics["type"] = "item_count"

    # apply any additional field patches
    for patch in patches:
        segments = patch.split("||")
        for s in segments:
            parts = s.split(":", 1)
            field = parts[0]
            try:
                value = json.loads(parts[1])
            except ValueError:
                value = parts[1]
            stack = field.split(".")
            _apply(record, stack, 0, value)

    return record, [statistics]
Example #6
0
def migrate_repo(repo):
    # the various components we need to assemble
    opendoar = {}
    metadata = {}
    organisation = {}
    contacts = []
    apis = []
    statistics = {}
    register = {}
    software = {}
    policies = []
    
    # original opendoar id
    odid = repo.get("rID")
    if odid is not None:
        opendoar["rid"] = odid
    
    # repository name
    _extract(repo, "rName", metadata, "name", unescape=True)
    
    # repository acronym
    _extract(repo, "rAcronym", metadata, "acronym", unescape=True)
    
    # repository url
    _extract(repo, "rUrl", metadata, "url")
    
    # oai base url
    oai = {"api_type" : "oai-pmh"}
    _extract(repo, "rOaiBaseUrl", oai, "base_url")
    if "base_url" in oai:
        apis.append(oai)
    
    # organisational details
    _extract(repo, "uName", organisation, "unit", unescape=True)
    _extract(repo, "uAcronym", organisation, "unit_acronym", unescape=True)
    _extract(repo, "uUrl", organisation, "unit_url")
    _extract(repo, "oName", organisation, "name", unescape=True)
    _extract(repo, "oAcronym", organisation, "acronym", unescape=True)
    _extract(repo, "oUrl", organisation, "url")
    _extract(repo, "paLatitude", organisation, "lat", cast=float)
    _extract(repo, "paLongitude", organisation, "lon", cast=float)
    
    cel = repo.find("country")
    _extract(cel, "cIsoCode", metadata, "country_code", lower=True)
    _extract(cel, "cIsoCode", organisation, "country_code", lower=True)

    isocode = cel.find("cIsoCode")
    if isocode is not None:
        code = isocode.text
        if code is not None and code != "":
            try:
                # specify the continent in the metadata
                continent_code = transformations.cca_to_ctca2(code)
                metadata["continent_code"] = continent_code.lower()
                continent = transformations.cca_to_ctn(code)
                metadata["continent"] = continent

                # normalised country name
                country = pycountry.countries.get(alpha2=code.upper()).name
                metadata["country"] = country
                organisation["country"] = country
            except KeyError:
                pass
    
    # repository description
    _extract(repo, "rDescription", metadata, "description", unescape=True)
    
    # remarks
    _extract(repo, "rRemarks", metadata, "description", unescape=True, append=True, prepend="  ")
    
    # statistics
    _extract(repo, "rNumOfItems", statistics, "value", cast=int)
    _extract(repo, "rDateHarvested", statistics, "date")
    
    # established date
    _extract(repo, "rYearEstablished", metadata, "established_date")
    
    # repository type
    _extract(repo, "repositoryType", metadata, "repository_type", aslist=True)
    
    # operational status
    _extract(repo, "operationalStatus", register, "operational_status")
    
    # software
    _extract(repo, "rSoftWareName", software, "name", unescape=True)
    _extract(repo, "rSoftWareVersion", software, "version")
    
    # subject classifications
    classes = repo.find("classes")
    if classes is not None:
        metadata["subject"] = []
        for c in classes:
            subject = {}
            _extract(c, "clCode", subject, "code")
            _extract(c, "clTitle", subject, "term", unescape=True)
            metadata["subject"].append(subject)
    
    # languages
    langs = repo.find("languages")
    if langs is not None:
        metadata["language_code"] = []
        metadata["language"] = []
        for l in langs:
            code = l.find("lIsoCode")
            if code is not None and code.text != "":
                lc = code.text.lower()
                lang = pycountry.languages.get(alpha2=lc).name
                metadata["language_code"].append(lc)
                metadata["language"].append(lang)
    
    # content types
    ctel = repo.find("contentTypes")
    if ctel is not None:
        metadata["content_type"] = []
        for ct in ctel:
            metadata["content_type"].append(ct.text)
    
    # policies
    polel = repo.find("policies")
    for p in polel:
        policy = {}
        _extract(p, "policyType", policy, "policy_type")
        _extract(p, "policyGrade", policy, "policy_grade")
        posel = p.find("poStandard")
        if posel is not None:
            policy["terms"] = []
            for item in posel:
                policy["terms"].append(item.text)
        policies.append(policy)
    
    # contacts
    conel = repo.find("contacts")
    for contact in conel:
        cont_details = {}
        _extract(contact, "pName", cont_details, "name", unescape=True)
        _extract(contact, "pJobTitle", cont_details, "job_title", unescape=True)
        _extract(contact, "pEmail", cont_details, "email")
        _extract(contact, "pPhone", cont_details, "phone")
        
        has_phone = contact.find("pPhone") is not None and contact.find("pPhone").text is not None
        
        # add the top level repo data about address and phone
        _extract(repo, "postalAddress", cont_details, "address", unescape=True)
        if not has_phone:
            _extract(repo, "paPhone", cont_details, "phone")
        _extract(repo, "paFax", cont_details, "fax")

        # we also add the top level stuff about lat/lon
        if organisation.get("lat") is not None:
            cont_details["lat"] = organisation.get("lat")
        if organisation.get("lon") is not None:
            cont_details["lon"] = organisation.get("lon")

        # record the job title as the contact role for the time being
        full_record = {"details" : cont_details}
        _extract(contact, "pJobTitle", full_record, "role", unescape=True, aslist=True)
        
        contacts.append(full_record)

    # now assemble the object
    register["metadata"] = [
        {
            "lang" : "en",
            "default" : True,
            "record" : metadata
        }
    ]
    register["software"] = [software]
    register["contact"] = contacts
    register["organisation"] = [{"details" : organisation, "role" : ["host"]}] # add a default role
    register["policy"] = policies
    register["api"] = apis
    
    opendoar["in_opendoar"] = True
    
    record = {
        "register" : register,
        "admin" : {
            "opendoar" : opendoar
        }
    }
    
    statistics["third_party"] = "opendoar"
    statistics["type"] = "item_count"
    
    return record, [statistics]
Example #7
0
 def detect(self, register, info):
     code = register.country_code
     continent_code = transformations.cca_to_ctca2(code)
     continent = transformations.cca_to_ctn(code)
     register.set_continent(name=continent, code=continent_code)
     log.info("Determined continent from country: " + code + " -> " + continent)