def _parse_geo(self, rrset): try: loc = rrset['GeoLocation'] except KeyError: # No geo loc return try: return loc['ContinentCode'] except KeyError: # Must be country cc = loc['CountryCode'] if cc == '*': # This is the default return cn = cca_to_ctca2(cc) try: return '{}-{}-{}'.format(cn, cc, loc['SubdivisionCode']) except KeyError: return '{}-{}'.format(cn, cc)
def lookup(ip): try: country_code = gi.country_code_by_addr(ip) continent = transformations.cca_to_ctca2(country_code) except Exception, e: return (None, None)
def parse_IEA_countries(self, force_reload=False): processed_filename = os.path.join(base_tmp_dir, 'IEA_countries_2017.pkl') try: if force_reload: raise with open(processed_filename, "rb") as f: (self.iea_specified_country_codes, self.iea_country_codes_to_named_region, self.middle_eastern_country_codes, self.non_OECD_Eurasian_country_codes, self.non_OECD_asia_country_codes, self.african_country_codes, self.non_oecd_americas_country_codes) = pickle.load(f) except: # Sort the IEA data in some not insane way middle_eastern_countries = [ 'Bahrain', 'Iran', 'Iraq', 'Jordan', 'Kuwait', 'Lebanon', 'Oman', 'Qatar', 'Saudi Arabia', 'Syrian Arab Republic', 'United Arab Emirates', 'Yemen' ] non_OECD_Eurasian_countries = [ 'Albania', 'Armenia', 'Azerbaijan', 'Belarus', 'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Cyprus', 'Macedonia', 'Georgia', 'Gibraltar', 'Kazakhstan', 'Kyrgyzstan', 'Lithuania', 'Malta', 'Moldova', 'Montenegro', 'Romania', 'Russian Federation', 'Serbia', 'Tajikistan', 'Turkmenistan', 'Ukraine', 'Uzbekistan' ] non_OECD_asia = [ 'Bangladesh', 'Brunei Darussalam', 'Cambodia', 'Democratic People\xe2\x80\x99s Republic of Korea', 'India', 'Indonesia', 'Malaysia', 'Mongolia', 'Myanmar', 'Nepal', 'Pakistan', 'Philippines', 'Singapore', 'Sri Lanka', 'Taiwan, province of china', 'Thailand', 'Vietnam', 'Afghanistan', 'Bhutan', 'Cambodia', 'Cook Islands', 'Fiji', 'French Polynesia', 'Kiribati', "Lao People's Democratic Republic", 'Macao', 'Maldives', 'Mongolia', 'New Caledonia', 'Palau', 'Papua New Guinea', 'Samoa', 'Solomon Islands', 'Tonga', 'Vanuatu' ] self.middle_eastern_country_codes = [ chf.find_country_by_name(country) for country in middle_eastern_countries ] self.non_OECD_Eurasian_country_codes = [ chf.find_country_by_name(country) for country in non_OECD_Eurasian_countries ] self.non_OECD_asia_country_codes = [ chf.find_country_by_name(country) for country in non_OECD_asia ] self.iea_specified_country_codes = {} for country in self.IEA_data.Country.unique().tolist(): try: code = chf.find_country_by_name(country) self.iea_specified_country_codes[code] = country except: pass self.iea_specified_country_codes[ u'CHN'] = "People's Republic of China" self.iea_specified_country_codes[u'KOR'] = "Korea" self.iea_specified_country_codes[u'SVK'] = "Slovak Republic" self.iea_specified_country_codes[u'CZE'] = "Czech Republic" self.iea_country_codes_to_named_region = copy.deepcopy( self.iea_specified_country_codes) self.iea_country_codes_to_named_region[ u'HKG'] = "People's Republic of China" self.iea_country_codes_to_named_region.update({ code: 'Middle East' for code in self.middle_eastern_country_codes }) self.iea_country_codes_to_named_region.update({ code: 'Non-OECD Europe and Eurasia' for code in self.non_OECD_Eurasian_country_codes }) self.iea_country_codes_to_named_region.update({ code: 'Non-OECD Asia' for code in self.non_OECD_asia_country_codes }) self.african_country_codes = [] self.non_oecd_americas_country_codes = [] for iso3 in chf.country_iso3_list: if iso3 not in self.iea_country_codes_to_named_region: try: cont_code = transformations.cca_to_ctca2(iso3) if cont_code == u'AF': self.iea_country_codes_to_named_region[ iso3] = 'Africa' self.african_country_codes.append(iso3) elif cont_code == u'NA' or cont_code == u'SA': self.iea_country_codes_to_named_region[ iso3] = 'Non-OECD Americas' self.non_oecd_americas_country_codes.append(iso3) except: pass with open(processed_filename, 'wb') as f: data_vars = (self.iea_specified_country_codes, self.iea_country_codes_to_named_region, self.middle_eastern_country_codes, self.non_OECD_Eurasian_country_codes, self.non_OECD_asia_country_codes, self.african_country_codes, self.non_oecd_americas_country_codes) pickle.dump(data_vars, f)
def _continent(cls, country_code): continent_code = transformations.cca_to_ctca2(country_code) continent = transformations.cca_to_ctn(country_code) return continent_code, continent
def migrate_repo(repo): # the various components we need to assemble opendoar = {} metadata = {} organisation = {} contacts = [] apis = [] statistics = {} register = {} software = {} policies = [] # a record of the patches to be applied to the data (mostly come from the policy data) patches = [] # original opendoar id odid = repo.get("rID") if odid is not None: opendoar["rid"] = odid # repository name _extract(repo, "rName", metadata, "name", unescape=True) # repository acronym _extract(repo, "rAcronym", metadata, "acronym", unescape=True) # repository url _extract(repo, "rUrl", metadata, "url") # oai base url oai = {"api_type" : "oai-pmh"} _extract(repo, "rOaiBaseUrl", oai, "base_url") if "base_url" in oai: apis.append(oai) # organisational details _extract(repo, "uName", organisation, "unit", unescape=True) _extract(repo, "uAcronym", organisation, "unit_acronym", unescape=True) _extract(repo, "uUrl", organisation, "unit_url") _extract(repo, "oName", organisation, "name", unescape=True) _extract(repo, "oAcronym", organisation, "acronym", unescape=True) _extract(repo, "oUrl", organisation, "url") _extract(repo, "paLatitude", organisation, "lat", cast=float) _extract(repo, "paLongitude", organisation, "lon", cast=float) cel = repo.find("country") _extract(cel, "cIsoCode", metadata, "country_code", lower=True) _extract(cel, "cIsoCode", organisation, "country_code", lower=True) isocode = cel.find("cIsoCode") if isocode is not None: code = isocode.text if code is not None and code != "": try: # specify the continent in the metadata continent_code = transformations.cca_to_ctca2(code) metadata["continent_code"] = continent_code.lower() continent = transformations.cca_to_ctn(code) metadata["continent"] = continent # normalised country name country = pycountry.countries.get(alpha2=code.upper()).name metadata["country"] = country organisation["country"] = country except KeyError: pass # repository description _extract(repo, "rDescription", metadata, "description", unescape=True) # remarks _extract(repo, "rRemarks", metadata, "description", unescape=True, append=True, prepend=" ") # statistics _extract(repo, "rNumOfItems", statistics, "value", cast=int) _extract(repo, "rDateHarvested", statistics, "date") # established date _extract(repo, "rYearEstablished", metadata, "established_date") # repository type _extract(repo, "repositoryType", metadata, "repository_type", aslist=True) # operational status _extract(repo, "operationalStatus", register, "operational_status") # software _extract(repo, "rSoftWareName", software, "name", unescape=True) _extract(repo, "rSoftWareVersion", software, "version") # subject classifications classes = repo.find("classes") if classes is not None: metadata["subject"] = [] for c in classes: subject = {} _extract(c, "clCode", subject, "code") _extract(c, "clTitle", subject, "term", unescape=True) metadata["subject"].append(subject.get("term")) # FIXME: a bit of a round trip here, but will suffice # languages langs = repo.find("languages") if langs is not None: metadata["language_code"] = [] metadata["language"] = [] for l in langs: code = l.find("lIsoCode") if code is not None and code.text != "": lc = code.text.lower() lang = pycountry.languages.get(alpha2=lc).name metadata["language_code"].append(lc) metadata["language"].append(lang) # content types ctel = repo.find("contentTypes") if ctel is not None: metadata["content_type"] = [] for ct in ctel: metadata["content_type"].append(ct.text) # policies polel = repo.find("policies") for p in polel: policy = {} _extract(p, "policyType", policy, "policy_type") posel = p.find("poStandard") if posel is not None: policy["terms"] = [] for item in posel: t = item.text.strip() # only keep terms which have mappings in the policy map mapped = policy_map.get(t) if mapped is not None: policy["terms"].append(mapped) # look for any special instructions on the term patch = instruction_map.get(t) if patch is not None: patches.append(patch) if len(policy.get("terms", [])) > 0: policies.append(policy) # contacts conel = repo.find("contacts") for contact in conel: cont_details = {} _extract(contact, "pName", cont_details, "name", unescape=True) _extract(contact, "pJobTitle", cont_details, "job_title", unescape=True) _extract(contact, "pEmail", cont_details, "email") _extract(contact, "pPhone", cont_details, "phone") has_phone = contact.find("pPhone") is not None and contact.find("pPhone").text is not None # add the top level repo data about address and phone _extract(repo, "postalAddress", cont_details, "address", unescape=True) if not has_phone: _extract(repo, "paPhone", cont_details, "phone") _extract(repo, "paFax", cont_details, "fax") # we also add the top level stuff about lat/lon if organisation.get("lat") is not None: cont_details["lat"] = organisation.get("lat") if organisation.get("lon") is not None: cont_details["lon"] = organisation.get("lon") # record the job title as the contact role for the time being full_record = {"details" : cont_details} _extract(contact, "pJobTitle", full_record, "role", unescape=True, aslist=True) contacts.append(full_record) # now assemble the object register["metadata"] = [ { "lang" : "en", "default" : True, "record" : metadata } ] if len(software.keys()) > 0: register["software"] = [software] if len(contacts) > 0: register["contact"] = contacts if len(organisation.keys()) > 0: register["organisation"] = [{"details" : organisation, "role" : ["host"]}] # add a default role if len(policies) > 0: register["policy"] = policies if len(apis) > 0: register["api"] = apis # final few opendoar admin values opendoar["in_opendoar"] = True opendoar["last_saved"] = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") record = { "register" : register, "admin" : { "opendoar" : opendoar } } statistics["third_party"] = "opendoar" statistics["type"] = "item_count" # apply any additional field patches for patch in patches: segments = patch.split("||") for s in segments: parts = s.split(":", 1) field = parts[0] try: value = json.loads(parts[1]) except ValueError: value = parts[1] stack = field.split(".") _apply(record, stack, 0, value) return record, [statistics]
def migrate_repo(repo): # the various components we need to assemble opendoar = {} metadata = {} organisation = {} contacts = [] apis = [] statistics = {} register = {} software = {} policies = [] # original opendoar id odid = repo.get("rID") if odid is not None: opendoar["rid"] = odid # repository name _extract(repo, "rName", metadata, "name", unescape=True) # repository acronym _extract(repo, "rAcronym", metadata, "acronym", unescape=True) # repository url _extract(repo, "rUrl", metadata, "url") # oai base url oai = {"api_type" : "oai-pmh"} _extract(repo, "rOaiBaseUrl", oai, "base_url") if "base_url" in oai: apis.append(oai) # organisational details _extract(repo, "uName", organisation, "unit", unescape=True) _extract(repo, "uAcronym", organisation, "unit_acronym", unescape=True) _extract(repo, "uUrl", organisation, "unit_url") _extract(repo, "oName", organisation, "name", unescape=True) _extract(repo, "oAcronym", organisation, "acronym", unescape=True) _extract(repo, "oUrl", organisation, "url") _extract(repo, "paLatitude", organisation, "lat", cast=float) _extract(repo, "paLongitude", organisation, "lon", cast=float) cel = repo.find("country") _extract(cel, "cIsoCode", metadata, "country_code", lower=True) _extract(cel, "cIsoCode", organisation, "country_code", lower=True) isocode = cel.find("cIsoCode") if isocode is not None: code = isocode.text if code is not None and code != "": try: # specify the continent in the metadata continent_code = transformations.cca_to_ctca2(code) metadata["continent_code"] = continent_code.lower() continent = transformations.cca_to_ctn(code) metadata["continent"] = continent # normalised country name country = pycountry.countries.get(alpha2=code.upper()).name metadata["country"] = country organisation["country"] = country except KeyError: pass # repository description _extract(repo, "rDescription", metadata, "description", unescape=True) # remarks _extract(repo, "rRemarks", metadata, "description", unescape=True, append=True, prepend=" ") # statistics _extract(repo, "rNumOfItems", statistics, "value", cast=int) _extract(repo, "rDateHarvested", statistics, "date") # established date _extract(repo, "rYearEstablished", metadata, "established_date") # repository type _extract(repo, "repositoryType", metadata, "repository_type", aslist=True) # operational status _extract(repo, "operationalStatus", register, "operational_status") # software _extract(repo, "rSoftWareName", software, "name", unescape=True) _extract(repo, "rSoftWareVersion", software, "version") # subject classifications classes = repo.find("classes") if classes is not None: metadata["subject"] = [] for c in classes: subject = {} _extract(c, "clCode", subject, "code") _extract(c, "clTitle", subject, "term", unescape=True) metadata["subject"].append(subject) # languages langs = repo.find("languages") if langs is not None: metadata["language_code"] = [] metadata["language"] = [] for l in langs: code = l.find("lIsoCode") if code is not None and code.text != "": lc = code.text.lower() lang = pycountry.languages.get(alpha2=lc).name metadata["language_code"].append(lc) metadata["language"].append(lang) # content types ctel = repo.find("contentTypes") if ctel is not None: metadata["content_type"] = [] for ct in ctel: metadata["content_type"].append(ct.text) # policies polel = repo.find("policies") for p in polel: policy = {} _extract(p, "policyType", policy, "policy_type") _extract(p, "policyGrade", policy, "policy_grade") posel = p.find("poStandard") if posel is not None: policy["terms"] = [] for item in posel: policy["terms"].append(item.text) policies.append(policy) # contacts conel = repo.find("contacts") for contact in conel: cont_details = {} _extract(contact, "pName", cont_details, "name", unescape=True) _extract(contact, "pJobTitle", cont_details, "job_title", unescape=True) _extract(contact, "pEmail", cont_details, "email") _extract(contact, "pPhone", cont_details, "phone") has_phone = contact.find("pPhone") is not None and contact.find("pPhone").text is not None # add the top level repo data about address and phone _extract(repo, "postalAddress", cont_details, "address", unescape=True) if not has_phone: _extract(repo, "paPhone", cont_details, "phone") _extract(repo, "paFax", cont_details, "fax") # we also add the top level stuff about lat/lon if organisation.get("lat") is not None: cont_details["lat"] = organisation.get("lat") if organisation.get("lon") is not None: cont_details["lon"] = organisation.get("lon") # record the job title as the contact role for the time being full_record = {"details" : cont_details} _extract(contact, "pJobTitle", full_record, "role", unescape=True, aslist=True) contacts.append(full_record) # now assemble the object register["metadata"] = [ { "lang" : "en", "default" : True, "record" : metadata } ] register["software"] = [software] register["contact"] = contacts register["organisation"] = [{"details" : organisation, "role" : ["host"]}] # add a default role register["policy"] = policies register["api"] = apis opendoar["in_opendoar"] = True record = { "register" : register, "admin" : { "opendoar" : opendoar } } statistics["third_party"] = "opendoar" statistics["type"] = "item_count" return record, [statistics]
def detect(self, register, info): code = register.country_code continent_code = transformations.cca_to_ctca2(code) continent = transformations.cca_to_ctn(code) register.set_continent(name=continent, code=continent_code) log.info("Determined continent from country: " + code + " -> " + continent)