def get_country(self, regex): """ Extract the country or special issue name from file name regex: A string containing a regular expression for everything that is not the country name (and to be excluded) but not the file extension """ # Output doc out = {} fname_wo_extension = self.fname[:-4] country = re.sub(regex, '', fname_wo_extension) country = re.sub('( |-)', '_', country) country = country.lower() # Store raw report_name out['report_name'] = country country = re.sub('_', ' ', country) # Resolve country names and store iso3 codes try: raw_name = countrycode(codes=country, origin='country_name', target='country_name') code = countrycode(codes=country, origin='country_name', target='iso3c') # If no name match look in the typo file if code == raw_name: try: raw_name = countrycode(codes=[self.typos[country]], origin='country_name', target='country_name') code = countrycode(codes=[self.typos[country]], origin='country_name', target='iso3c') out['country_name'] = raw_name out['country_code'] = code except KeyError: print "Could not resolve country name for %s" % raw_name out['country_name'] = "Not resolved" out['country_code'] = "Not resolved" else: out['country_name'] = raw_name out['country_code'] = code except UnicodeEncodeError: print "Could not resolve country name for %s" % raw_name out['country_name'] = "Not resolved" out['country_code'] = "Not resolved" # Somehow countrycode returns list in some cases. Check and make string if isinstance(out['country_name'], list): out['country_name'] = out['country_name'][0] if isinstance(out['country_code'], list): out['country_code'] = out['country_code'][0] return out
def countryname_concordance(data, concord_vars=('countryname', 'iso3c'), rtn_type='series', verbose=False): """ Compute a Country Name Concordance using package: ``pycountrycode`` Parameters ---------- data : list(string) List of Country Names concord_vars : tuple(string, string), optional(default=('countryname', iso3c')) Specify variables to Concord rtn_type : str, optional(default='series') Return Type (dict or indexed Pandas Series) Notes ------ ``pycountrycode`` is going through a re-write so this will most likely break .. Future Work ----------- [1] Build Internal CountryCode Routines """ def replace_nonstring(items): for idx, code in enumerate(items): if type(code) != str: items[idx] = '.' return items def reject_non3digit(items): for idx, code in enumerate(items): if len(code) != 3: items[idx] = '.' return items if type(data) != list: raise TypeError("data: needs to be a list") #-Find Set Of Countries-# if type(data) == list: countrynames = list(set(data)) if concord_vars[1] == 'iso3c': iso3c = _cc.countrycode(codes=countrynames, origin='country_name', target='iso3c') iso3c = replace_nonstring(iso3c) #Could use lambda functions iso3c = reject_non3digit(iso3c) #Could use lambda functions concord = _pd.Series(iso3c, index=countrynames, name='iso3c') concord.index.name = 'countryname' concord.sort() if concord_vars[1] == 'iso3n': iso3n = _cc.countrycode(codes=countrynames, origin='country_name', target='iso3n') iso3n = replace_nonstring(iso3n) iso3n = reject_non3digit(iso3n) concord = _pd.Series(iso3n, index=countrynames, name='iso3n') concord.index.name = 'countryname' concord.sort() #-Parse rtn_type-# if rtn_type == 'series': return concord else: return concord.to_dict()
def get_country(self, regex): """ Extract the country or special issue name from file name regex: A string containing a regular expression for everything that is not the country name (and to be excluded) but not the file extension """ # Output doc out = {} fname_wo_extension = self.fname[:-4] country = re.sub(regex, '', fname_wo_extension) country = re.sub('( |-)', '_', country) country = country.lower() # Store raw report_name out['report_name'] = country country = re.sub('_', ' ', country) # Resolve country names and store iso3 codes try: raw_name = countrycode(codes = country, origin='country_name', target='country_name') code = countrycode(codes = country, origin='country_name', target='iso3c') # If no name match look in the typo file if code == raw_name: try: raw_name = countrycode(codes = [self.typos[country]], origin='country_name', target='country_name') code = countrycode(codes = [self.typos[country]], origin= 'country_name', target= 'iso3c') out['country_name'] = raw_name out['country_code'] = code except KeyError: print "Could not resolve country name for %s" %raw_name out['country_name'] = "Not resolved" out['country_code'] = "Not resolved" else: out['country_name'] = raw_name out['country_code'] = code except UnicodeEncodeError: print "Could not resolve country name for %s" %raw_name out['country_name'] = "Not resolved" out['country_code'] = "Not resolved" # Somehow countrycode returns list in some cases. Check and make string if isinstance(out['country_name'], list): out['country_name'] = out['country_name'][0] if isinstance(out['country_code'], list): out['country_code'] = out['country_code'][0] return out
def get_global_stats(): gg_stats = {'users': [], 'countries': {}} users = User.query.all() t_dict = {} count = 0 for user in users: if count == 10: break count += 1 _, _, info, _, _ = analyze_user_books(user) print count gg_stats['users'].append(user.id) for cc_key in info['ath_c']: if cc_key in t_dict: t_dict[cc_key] += info['ath_c'][cc_key] else: t_dict[cc_key] = info['ath_c'][cc_key] cc_full_name = t_dict.keys() c_codes = countrycode(codes=cc_full_name, origin='country_name', target='iso3c') cc_list = [] for index, elem in enumerate(c_codes): temp = [] temp.append(c_codes[index]) temp.append(t_dict[cc_full_name[index]]) cc_list.append(temp) gg_stats['countries'] = cc_list with open('~/good_reads/file.json', 'w') as f: json.dump(gg_stats, f) return cc_list
def get_countries(): # generator_url is now loaded dynamically. This is a quick fix... doc = lxml.html.parse("itunes_feed_gen.html") e = doc.xpath('.//div[@class="app-controls"]')[0] countries = [c.text.encode('utf-8') for c in e.xpath(".//select/option")] # Convert country long name to iso2c return countrycode(codes=countries, origin='country_name', target='iso2c')
def get_country_lookup(df): countrycodes = pd.unique(df.country) countries = countrycode(codes=list(countrycodes), origin="fips104", target='country_name') for i, c in enumerate(countries): if c is None: countries[i] = countrycodes[i] keys = countrycodes values = countries countrydict = dict(zip(keys, values)) return countrydict
def cname(data, details=False): """Standardize country names""" cntrs=dictPull(data, 'name') cntrs[closeMatchI('Congo Democratic of',cntrs)]='Congo, Democratic' cntrs[closeMatchI('Congo Republic of',cntrs)]='Congo, Republic' cntrsClean=countrycode(cntrs,'country_name','country_name') for ii in range(0,len(cntrsClean)): data[ii]['nameClean']=cntrsClean[ii].lower() if(details): print data[ii]['name'] + ' ---> ' + cntrsClean[ii] + '\n' return data
def wifi_change_region(self, country): country = countrycode(country, origin='country_name', target='iso2c') self.sudo_sendline("iw reg set %s" % (country)) self.expect(self.prompt) self.sendline("iw reg get") self.expect(self.prompt) match = re.search(country, self.before) if match: return match.group(0) else: return None
def cname(data, details=False): """Standardize country names""" cntrs = dictPull(data, 'name') cntrs[closeMatchI('Congo Democratic of', cntrs)] = 'Congo, Democratic' cntrs[closeMatchI('Congo Republic of', cntrs)] = 'Congo, Republic' cntrsClean = countrycode(cntrs, 'country_name', 'country_name') for ii in range(0, len(cntrsClean)): data[ii]['nameClean'] = cntrsClean[ii].lower() if (details): print data[ii]['name'] + ' ---> ' + cntrsClean[ii] + '\n' return data
def continent_by_country_name(event): """ Get continent name (e.g. "Europe"), given a country name (e.g. "Italy") """ country_name = event.get('country') if not country_name: return _error("Invalid event (required country)") continent = countrycode(codes=[country_name], origin="country_name", target='continent') if not continent: return _error("Invalid country: %s" % country_name) return { "continent": next(iter(continent)), }
def get_country(self,row,path="#country",return_default = True): country = "" if path + "+code" in row.keys(): country = row[path + '+code'] return country.lower() if (len(country) < 2) and (path in row.keys()): if row.get(path,"xx") in self.country_cache.keys(): country = self.country_cache[row.get(path,"xx")] else: country = countrycode(codes=[row.get(path,"")],origin='country_name',target="iso2c")[0] self.country_cache[row.get(path,"xx")] = country else: if return_default: country = "123" + self.default_country else: country = "unknown" return country.lower()
def extInfo(raw,label): store=dict.fromkeys(['plaintiff','pClean','claimant','year','month','type','status']) store['claimant']=rPunct( cleanStrSoup(raw[0],'colspan="3">',' v. ') ) store['plaintiff']=rPunct( cleanStrSoup(raw[0],' v. ',' (ICSID') ) if store['plaintiff'] in 'Democratic Republic of the Congo': store['pClean']='CONGO, THE DEMOCRATIC REPUBLIC OF' else: store['pClean']=countrycode( store['plaintiff'],'country_name','country_name').upper() yr=cleanStrSoup(raw[0],'No. ',')</td>').split('/') if store['claimant'] not in 'Oded Besserglik': store['type']=yr[0] if(len(yr[1])==2 and int(yr[1])>20): year='19'+yr[1] else: year='20'+yr[1] store['year']=year store['month']=yr[2] else: store['type']='ARB(AF)' store['year']='2014' store['month']='2' store['status']=label return store
def _on_selection(self, index): self.ui.lblCountry.setText(countrycode(self.codes[index], origin="iso2c")) self.ui.lblCode3.setText(countrycode(self.codes[index], origin="iso2c", target="iso3c"))
def get_name_from_code(code): names = countrycode(codes=[code], origin='iso3c', target='country_name') return names[0]
def get_code_from_name(name): codes = countrycode(codes=[name], origin='country_name', target='iso3c') return codes[0]
def Export_TIMES(df=None, use_scaled_capacity=False): if df is None: df = Carma_ENTSOE_ESE_GEO_OPSD_WEPP_WRI_matched_reduced() if df is None: raise RuntimeError("The data to be exported does not yet exist.") df = df.copy() # replace country names by iso3166-2 codes df.loc[:, 'Country'] = countrycode(codes=df.Country, origin='country_name', target='iso2c') # add column with TIMES-specific type. The pattern is as follows: # 'ConELC-' + Set + '_' + Fueltype + '-' Technology df.loc[:, 'Technology'].fillna('', inplace=True) df.insert(10, 'TimesType', np.nan) df.loc[:,'TimesType'] = pd.Series('ConELC-' for _ in range(len(df))) +\ np.where(df.loc[:,'Set'].str.contains('CHP'),'CHP','PP') +\ '_' + df.loc[:,'Fueltype'].map(fueltype_to_abbrev()) df.loc[(df['Fueltype'] == 'Wind') & (df['Technology'].str.contains('offshore', case=False)), 'TimesType'] += 'F' df.loc[(df['Fueltype'] == 'Wind') & (df['Technology'].str.contains('offshore', case=False) == False), 'TimesType'] += 'N' df.loc[(df['Fueltype'] == 'Natural Gas') & (df['Technology'].str.contains('CCGT', case=False)), 'TimesType'] += '-CCGT' df.loc[(df['Fueltype']=='Natural Gas') & (df['Technology'].str.contains('CCGT', case=False)==False)\ & (df['Technology'].str.contains('OCGT', case=False)),'TimesType'] += '-OCGT' df.loc[(df['Fueltype']=='Natural Gas') & (df['Technology'].str.contains('CCGT', case=False)==False)\ & (df['Technology'].str.contains('OCGT', case=False)==False),'TimesType'] += '-ST' df.loc[(df['Fueltype'] == 'Hydro') & (df['Technology'].str.contains('pumped storage', case=False)), 'TimesType'] += '-PST' df.loc[(df['Fueltype']=='Hydro') & (df['Technology'].str.contains('run-of-river', case=False))\ & (df['Technology'].str.contains('pumped storage', case=False)==False),'TimesType'] += '-ROR' df.loc[(df['Fueltype']=='Hydro') & (df['Technology'].str.contains('run-of-river', case=False)==False)\ & (df['Technology'].str.contains('pumped storage', case=False)==False),'TimesType'] += '-STO' # add column with technical lifetime df.insert(12, 'Life', np.nan) df.loc[:, 'Life'] = df.TimesType.map(timestype_to_life()) # add column with decommissioning year df.insert(13, 'YearDecommissioned', np.nan) df.loc[:, 'YearDecommissioned'] = df.loc[:, 'YearCommissioned'] + df.loc[:, 'Life'] # Now create new export dataframe with headers countries = sorted(set(df.Country)) if None in countries: raise ValueError("""There are rows without a valid country identifier in the dataframe. Please check!""") columns = ['Attribute', '*Unit', 'LimType', 'Year'] columns.extend(countries) columns.append('Pset_Pn') df_exp = pd.DataFrame(columns=columns) # Loop stepwise through technologies, years and countries row = 0 timestypes = sorted(set(df.TimesType)) if None in timestypes: raise ValueError( """There are rows without a valid TIMES-Type identifier in the dataframe. Please check!""") data_timestypes = df.groupby(df.TimesType) cap_column = 'Scaled Capacity' if use_scaled_capacity else 'Capacity' for tt in timestypes: tt_group = data_timestypes.get_group(tt) for yr in range(2010, 2055, 5): df_exp.loc[row, 'Year'] = yr data_countries = tt_group.groupby(tt_group.Country) for ct in countries: if ct in data_countries.groups: ct_group = data_countries.get_group(ct) # Here, the matched elements are being filtered series = ct_group.apply(lambda x: x[cap_column] \ if yr >= x['YearCommissioned'] and yr <= x['YearDecommissioned'] else 0, axis=1) # Divide the sum by 1000 (MW->GW) and write into the export dataframe df_exp.loc[row, ct] = series.sum() / 1000 else: df_exp.loc[row, ct] = 0 df_exp.loc[row, 'Pset_Pn'] = tt row = row + 1 df_exp.loc[:, 'Attribute'] = 'STOCK' df_exp.loc[:, '*Unit'] = 'GW' df_exp.loc[:, 'LimType'] = 'FX' # Write resulting dataframe to file outfn = os.path.join(os.path.dirname(__file__), 'data', 'out', 'Export_Stock_TIMES.xlsx') df_exp.to_excel(outfn) return df_exp
def test_cown_iso3c(): assert countrycode(codes=['666', '31'], origin='cown', target='iso3c') == ['ISR', 'BHS']
new_data.append(line_format) if not quiet: print('') match_country = re.compile(r'\(([a-z\s]+)\)$', re.I) data_to_append = [] if not quiet: print(u'\x1b[1m--- Adding new networks ---\x1b[0m'.center(127)) print(u'\x1b[1m\x1b[4;30;47m' + u'Action'.center(17) + u'Network Name'.center(35) + u' - ' + u'Country'.center(35) + u' - ' + u'Guessed Time Zone'.center(35) + u'\x1b[0m') for key, value in new_list.items(): # try to determine time zone by country name in display name tz_guess = '' country = re.findall(match_country, value) if country: code = countrycode(codes=country, origin='country_name', target='iso2c')[0] if len(code) > 2: code = override_map.get(code, code) if len(code) <= 2: tz_guess = country_timezones(code)[0] if tz_guess: auto_new_count += 1 if not quiet: print(u'\x1b[1m\x1b[0;30;46m{0: ^16}\x1b[0m {1: ^35} - {2: ^35} - {3: ^35}'.format( 'New network:', key, country[0], tz_guess)) new_data.append(u'{name}:{time_zone}\n'.format(name=key, time_zone=tz_guess)) else: new_count += 1 if not quiet: print(u'\x1b[1m\x1b[0;30;43m{0: ^16} {1: ^35} - {2: ^35} - {3: ^35}\x1b[0m'.format(
def test_default(): assert countrycode() == ['Algeria', 'Canada']
def test_unicode(): assert countrycode('DZA', 'iso3c', 'country_name') == 'Algeria'
def test_cn_iso3c(): assert countrycode(['United States', 'India', 'Canada', 'Dem. Repu. Congo'], 'country_name', 'iso3c') == ['USA', 'IND', 'CAN', 'COD']
country_hash = {} print('Counting repeat data') cnt = collections.Counter(timezones) #print cnt for key in cnt.keys(): r = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address=='+ str(key) + '&sensor=true&key=AIzaSyC61vF0pmAoVlia85hIhtUBx2G2hZkF1us') response = json.loads(r.content) results = response['results'] if(len(results) != 0): results = response['results'][0]['formatted_address'] results = results.split() list_countries = [] country = results[len(results) - 1] list_countries.append(country) c_code = countrycode(codes = list_countries, origin ='country_name', target = 'iso3c') if c_code[0] in country_hash.keys(): country_hash[c_code[0]]['numberOfThings'] = country_hash[c_code[0]]['numberOfThings'] + cnt[key] else: country_hash[c_code[0]] = {} country_hash[c_code[0]]['numberOfThings'] = cnt[key] max_things = 0 for key in country_hash.keys(): if country_hash[key]['numberOfThings'] > max_things: max_things = country_hash[key]['numberOfThings'] for key in country_hash.keys(): scale = country_hash[key]['numberOfThings']/max_things if(scale > 0.75): country_hash[key]['fillKey'] = 'HIGH'
def test_cn_iso3c(): assert countrycode( ['United States', 'India', 'Canada', 'Dem. Repu. Congo'], 'country_name', 'iso3c') == ['USA', 'IND', 'CAN', 'COD']
def test_iso3c_cn_single(): assert countrycode('DZA', 'iso3c', 'country_name') == 'Algeria'
def test_regex(): assert countrycode('Cape Verde', 'regex', 'iso3c') == 'CPV' assert countrycode('Cabo Verde', 'regex', 'iso3c') == 'CPV' assert countrycode("Cote d'Ivoire", 'regex', 'iso3c') == 'CIV' assert countrycode("Côte d'Ivoire", 'regex', 'iso3c') == 'CIV' assert countrycode('georgia', 'country_name', 'iso3c') == 'GEO' assert countrycode('south georgia', 'country_name', 'iso3c') == 'SGS' assert countrycode('serbia', 'country_name', 'iso3c') == 'SRB' assert countrycode('serbia and montenegro', 'regex', 'iso3c') == 'SRB' assert countrycode('st. kitts and nevis', 'country_name', 'iso3c') == 'KNA' assert countrycode('st. christopher and nevis', 'country_name', 'iso3c') == 'KNA' assert countrycode('st. maarten', 'country_name', 'iso3c') == 'SXM' assert countrycode('sint maarten', 'country_name', 'iso3c') == 'SXM' assert countrycode('saint maarten', 'country_name', 'iso3c') == 'SXM' assert countrycode('guinea', 'country_name', 'iso3c') == 'GIN' assert countrycode('guinea bissau', 'country_name', 'iso3c') == 'GNB' assert countrycode('equatorial guinea', 'country_name', 'iso3c') == 'GNQ' assert countrycode('niger', 'country_name', 'iso3c') == 'NER' assert countrycode('nigeria', 'country_name', 'iso3c') == 'NGA' assert countrycode('west bank', 'country_name', 'iso3c') == 'PSE' assert countrycode('south korea', 'country_name', 'iso3c') == 'KOR' assert countrycode('korea', 'country_name', 'iso3c') == 'KOR' assert countrycode('korea, dem. rep.', 'country_name', 'iso3c') == 'PRK' assert countrycode('democ. republic of congo', 'country_name', 'iso3c') == 'COD' assert countrycode('republic of congo', 'country_name', 'iso3c') == 'COG'
def test_unicode(): assert countrycode(u'DZA', 'iso3c', 'country_name') == 'Algeria'
def test_regex(): assert countrycode('georgia', 'country_name', 'iso3c') == 'GEO' assert countrycode('south georgia', 'country_name', 'iso3c') == 'SGS' assert countrycode('serbia', 'country_name', 'iso3c') == 'SRB' assert countrycode('serbia and montenegro', 'country_name', 'iso3c') == 'SRB' assert countrycode('st. kitts and nevis', 'country_name', 'iso3c') == 'KNA' assert countrycode('st. christopher and nevis', 'country_name', 'iso3c') == 'KNA' assert countrycode('st. maarten', 'country_name', 'iso3c') == 'SXM' assert countrycode('sint maarten', 'country_name', 'iso3c') == 'SXM' assert countrycode('saint maarten', 'country_name', 'iso3c') == 'SXM' assert countrycode('guinea', 'country_name', 'iso3c') == 'GIN' assert countrycode('guinea bissau', 'country_name', 'iso3c') == 'GNB' assert countrycode('equatorial guinea', 'country_name', 'iso3c') == 'GNQ' assert countrycode('niger', 'country_name', 'iso3c') == 'NER' assert countrycode('nigeria', 'country_name', 'iso3c') == 'NGA' assert countrycode('west bank', 'country_name', 'iso3c') == 'PSE' assert countrycode('south korea', 'country_name', 'iso3c') == 'KOR' assert countrycode('korea', 'country_name', 'iso3c') == 'KOR' assert countrycode('korea, dem. rep.', 'country_name', 'iso3c') == 'PRK' assert countrycode('democ. republic of congo', 'country_name', 'iso3c') == 'COD' assert countrycode('republic of congo', 'country_name', 'iso3c') == 'COG'