def __init__(self, name, email, invalid_matches, amazon, country): self.name = name self.email = email self.invalid_matches = invalid_matches self.amazon = amazon try: self.continent = transformations.cn_to_ctn(country) except KeyError as e: self.continent = transformations.cn_to_ctn(self.__handle_country_name(country))
def construct_santa_recipient(cls, raw_string): obj = cls() name, email, amazon, country = re.match(r'([^<]*)<([^>]*)>.*(http[^ ]*) ([^<]+)', raw_string).groups() obj.name = name.strip() obj.email = email obj.invalid_matches = [] obj.amazon = amazon try: obj.continent = transformations.cn_to_ctn(country) obj.country = country except KeyError as e: obj.continent = transformations.cn_to_ctn(obj.__handle_country_name(country)) return obj
def construct_santa_recipient(cls, raw_string): obj = cls() name, email, amazon, country = re.match( r'([^<]*)<([^>]*)>.*(http[^ ]*) ([^<]+)', raw_string).groups() obj.name = name.strip() obj.email = email obj.invalid_matches = [] obj.amazon = amazon try: obj.continent = transformations.cn_to_ctn(country) obj.country = country except KeyError as e: obj.continent = transformations.cn_to_ctn( obj.__handle_country_name(country)) return obj
def isafrica (location): location = location.capitalize () try: if location.startswith('Congo'): return True return transformations.cn_to_ctn(location) == 'Africa' except: return False
def countries(): continents = {"Unspecified"} for country in pycountry.countries: try: continents.add(transformations.cn_to_ctn(country.name)) except KeyError as e: print ('KeyError - reason "%s"' % str(e)) for x in continents: if db(db.continent.continent_name == x).isempty(): db.continent.insert(continent_name=x) for country in pycountry.countries: try: # seems som continent = transformations.cn_to_ctn(country.name) if db(db.country.country_name == country).isempty(): db.country.insert(country_name=country.name, continent=continent) except KeyError as e: print ('IKeyError - reason "%s"' % str(e)) return locals()
def find_continent(country): if country == 'Unknown': return 'Unknown' else: return transformations.cn_to_ctn(country)
def cn_to_ctn(country): try: original_name = ' '.join(re.findall('[A-Z][^A-Z]*', country[4:])) return transformations.cn_to_ctn(original_name) except KeyError: return "unk"
__author__ = 'Gaurav-PC' from incf.countryutils import transformations result = transformations.cn_to_ctn('Vietnam') print(result)
def assignCodeSEC(self,secModifier): print "assigning code to " + secModifier + "....." code_train = pd.read_csv( "C:/Users/niharika.kumar/Desktop/scrubbing tool/sec.modifier datasets/secondary.modif_code.csv", header=0, \ delimiter=",", quoting=2) code_train = code_train.loc[code_train['Modifier'] == secModifier] code = [] rms_code = [] max_country = '' max_state = '' max_longitude='' max_latitude='' max_city='' if not confidence['country'] == []: max_country = max(confidence['country'][0].iteritems(), key=operator.itemgetter(1))[0] if not confidence['state'] == []: max_state = max(confidence['state'][0].iteritems(), key=operator.itemgetter(1))[0] if not confidence['city'] == []: max_city = max(confidence['city'][0].iteritems(), key=operator.itemgetter(1))[0] if not confidence['county'] == []: max_county = max(confidence['county'][0].iteritems(), key=operator.itemgetter(1))[0] if not confidence['latitude'] == [] and not confidence['longitude'] == []: max_latitude = max(confidence['latitude'][0].iteritems(), key=operator.itemgetter(1))[0] max_longitude = max(confidence['longitude'][0].iteritems(), key=operator.itemgetter(1))[0] region = '' columns = ['Text', 'Description', 'Rms', 'Cosine', 'Code', 'Count'] df_ = pd.DataFrame(columns=columns) df_ = df_.fillna(0) i = -1 for text in Scrub.bifercated_data[secModifier]: i = i + 1 t = '' count = 0 df_ = df_[0:0] if text == '': code.append('') rms_code.append('') else: if not max_latitude=='' and not max_longitude=='': t = str(Scrub.sov[max_latitude][i])+','+str(Scrub.sov[max_longitude][i]) elif not max_country == '': t = output_detail['country'][max_country][i] continent = transformations.cn_to_ctn(t) elif not max_state == '': t = str(Scrub.sov[max_state][i]) elif not max_city =='': t = str(Scrub.sov[max_city][i]) elif not max_county =='': t = str(Scrub.sov[max_county][i]) try: x = "https://maps.googleapis.com/maps/api/geocode/json?address=" + t.strip() + "&key=AIzaSyBOR3PXZKEKREeOtZptN3K2Wb3D85PTB18" html_page = urllib2.urlopen(x) r = json.load(html_page) if len(r['results']) != 0: formatted_address = r['results'][0]["formatted_address"] if 'USA' in formatted_address or 'Canada' in formatted_address: country = 'USA' elif 'Japan' in formatted_address: country = 'Japan' elif continent == 'Europe': country = 'Europe' else: country = 'Rest' print text if 1: if country == 'USA' or country == 'Canada': region = 'u.s.a ,canada' region_data = code_train.loc[code_train['Region'] == 'u.s.a ,canada'] elif country == 'Japan': region_data = code_train.loc[code_train['Region'] == 'Japan'] region = 'Japan' elif country == 'Europe': region_data = code_train.loc[code_train['Region'] == 'europe'] region = 'europe' else: region_data = code_train.loc[code_train['Region'] == 'Rest'] region = 'Rest' for index, row in region_data.iterrows(): vector1 = self.text_to_vector(str(row['Description'])) vector2 = self.text_to_vector(str(text)) cosine = self.get_cosine(vector1, vector2) if cosine >= 0.3: df_.loc[count] = [text, row['Description'], row['Rms Description'], cosine, row['Code'], 0] count = count + 1 print "finding the most probable code...." # group by description,cosine and code df_frequency = pd.DataFrame( {'Count': df_.groupby(['Description', 'Cosine', 'Code', 'Rms']).size()}).reset_index() max_count = df_frequency['Count'].max() print max_count if not np.isnan(max_count): df_frequency = df_frequency.loc[df_frequency['Count'] == max_count] freq = len(df_frequency.index) if freq >= 1: max_cosine = df_frequency['Cosine'].max() df_frequency = df_frequency.loc[df_frequency['Cosine'] == max_cosine] freq_code = df_frequency['Code'].iloc[0] code.append(freq_code) rms_code.append(df_frequency['Rms'].iloc[0]) del df_frequency else: code.append(0) rms_code.append('UNKNOWN') except Exception,ex: print str(ex)
def cn_to_ctn(country): try: return transformations.cn_to_ctn(id_to_name_map[country]) except KeyError: return "unk"
def get_location(query, format, api_key): """Get geographic data of a lab in a coherent way for all labs.""" # Play nice with the API... sleep(1) geolocator = OpenCage(api_key=api_key, timeout=10) # Variables for storing the data data = { "city": None, "address_1": None, "postal_code": None, "country": None, "county": None, "state": None, "country_code": None, "latitude": None, "longitude": None, "continent": None } road = "" number = "" # Default None values location_data = { "city": None, "road": None, "house_number": None, "postcode": None, "country": None, "county": None, "state": None, "ISO_3166-1_alpha-2": None, "country_code": None, "lat": None, "lng": None } # Reverse geocoding ... from coordinates to address if format == "reverse": # If the query (coordinates) is not empty if query is None or len(query) < 3: pass else: location = geolocator.reverse(query) if location is not None: location_data = location[0].raw[u'components'] location_data["lat"] = location[0].raw[u'geometry']["lat"] location_data["lng"] = location[0].raw[u'geometry']["lng"] # Direct geocoding ... from address to coordinates and full address if format == "direct": # If the query (address) is not empty if query is None or len(query) < 3: pass else: location = geolocator.geocode(query) if location is not None: location_data = location.raw[u'components'] location_data["lat"] = location.raw[u'geometry']["lat"] location_data["lng"] = location.raw[u'geometry']["lng"] # Extract the meaningful data for component in location_data: if component == "town" or component == "city": data["city"] = location_data[component] if component == "road": road = location_data[component] if component == "house_number": number = location_data[component] if component == "postcode": data["postal_code"] = location_data[component] if component == "country": data["country"] = location_data[component] if component == "county": data["county"] = location_data[component] if component == "state": data["state"] = location_data[component] if component == "ISO_3166-1_alpha-2": data["country_code"] = location_data[component] # The address need to be reconstructed data["address_1"] = unicode(road) + " " + unicode(number) data["latitude"] = location_data["lat"] data["longitude"] = location_data["lng"] # Format the country code to three letters try: country_data = transformations.cca2_to_ccn(data["country_code"]) data["country_code"] = transformations.ccn_to_cca3(country_data) except: data["country_code"] = None # Get the continent try: country_data = transformations.cc_to_cn(data["country_code"]) data["continent"] = transformations.cn_to_ctn(country_data) except: data["continent"] = None # Return the final data return data