def create_country_finder(): result = list([ unidecode.unidecode(x["name"]) for x in geonamescache.GeonamesCache().get_countries().values() ]) regex_compiled = _compile_regex(result) return LocationFinder(regex_compiled)
def cityDic(): city = geonamescache.GeonamesCache().get_cities() citiyDic = {} cityList = [] n = 0 for key in city: if city[key]['countrycode'] in ['AT', 'DE', 'GB', 'FR' ] and city[key]['population'] > 150000: city_name = city[key]['name'] cityList.append(city_name.lower()) citiyDic[city_name.lower()] = city[key] #it_city_name = translator.translate(city_name, dest='it').text translator = Translator(to_lang="it") it_city_name = translator.translate(city_name) if it_city_name != city_name: cityList.append(it_city_name.lower()) citiyDic[it_city_name.lower()] = city[key] n += 1 cityFilter = [ 'livorno', ] # 'nice'] cityList = list(filter(lambda a: a not in cityFilter, cityList)) cityList.append('barcellona') print(n) print(len(cityList)) print(len(cityList)) return citiyDic, cityList
def __init__(self): self.dataframes = {} gc = geonamescache.GeonamesCache() gc_data = gc.get_countries() self.country_metadata = {} normalized_names = { "Timor Leste": "East Timor", "Vatican": "Vatican City", "Democratic Republic of the Congo": "Congo (Kinshasa)", "Republic of the Congo": "Congo (Brazzaville)", "Cabo Verde": "Cape Verde" } for country_code in gc_data: metadata = gc_data[country_code] name = metadata["name"] if name in normalized_names: name = normalized_names[name] population = metadata["population"] area = metadata["areakm2"] continent = continent_codes[metadata["continentcode"]] self.country_metadata[name] = { "population": population, "area": area, "continent": continent } for metric in data_urls.keys(): url = base_url + data_urls[metric] # Combine URL parts r = requests.get(url) # Retrieve from URL self.dataframes[metric] = pd.read_csv(StringIO(r.text), sep=",") # Convert into Pandas dataframe
def cityDic(): city = geonamescache.GeonamesCache().get_cities() citiyDic = {} cityList = [] n = 0 for key in city: if city[key]['countrycode'] == 'IT' and city[key]['population'] > 20000: if len(city[key]['alternatenames'][0]) != 0: validCityNames = [city[key]['name'].lower()] + [ name.lower() for name in city[key]['alternatenames'] if only_roman_chars(name) and notAllUpper(name) and len(name) > 3 ] cityList += validCityNames for name in validCityNames: citiyDic[name] = city[key] else: cityList += [city[key]['name'].lower()] citiyDic[city[key]['name'].lower()] = city[key] n += 1 cityFilter = ['regio', 'marino', 'come', 'bra', 'ramma'] cityList = list(filter(lambda a: a not in cityFilter, cityList)) cityList = list(set(cityList)) print(len(cityList)) return citiyDic, cityList
def is_city_in_country(self, city_name, country): gc = geonamescache.GeonamesCache() cities = gc.get_cities_by_name(city_name) for city in cities: for k,v in city.iteritems(): if country == pycountry.countries.get(alpha2=v.get('countrycode')).name: return True return False
def __init__(self): self.cities = [] # initialise the cities dict into a more useful form for city in geonamescache.GeonamesCache().get_cities().values(): # lowercase all the names names = {city['name'].lower()} names.update(n.lower() for n in city['alternatenames']) self.cities.append((names, city))
def process_item(self, item, spider): gc = geonamescache.GeonamesCache() # gets nested dictionary for countries countries = gc.get_countries() country_list = {} for country in countries: for k, v in countries[country].items(): if k == "name": country_list[v] = country elif k == "iso": country_list[v] = country s = ".".join(v[i:i + 1] for i in range(0, len(v), 1)) country_list[s] = country elif k == "iso3": country_list[v] = country # gets nested dictionary for cities cities = gc.get_cities() city_list = [] city_country = [] for elem in cities.values(): city_list.append(elem['name']) city_country.append({elem['name']: elem['countrycode']}) nlp = spacy.load("en_core_web_sm") doc = nlp(item['main_text']) country = [] location = [] count_country = {} result = [] for ent in doc.ents: if ent.label_ == 'GPE': # find the matching country inside country_list find_country = country_list.get(ent.text) if find_country is not None and count_country.get( find_country) is None: count_country[find_country] = count_country.get( find_country, 0) + 1 country.append(ent.text) elif ent.text in city_list: for dict_city_country in city_country: if ent.text in list(dict_city_country.keys()): location.append(dict_city_country) for country_name in country: iso = country_list[country_name] for location_dict in location: if iso in list(location_dict.values()): locationItem = LocationItem() locationItem["country"] = country_name locationItem["location"] = list( location_dict.keys()).pop(0) result.append(locationItem) if not any(elem["country"] == country_name for elem in result): result.append({"country": country_name, "location": ""}) i = 0 for e in result: item['reports'][0]['locations'].append(e) return item
async def city_answer(ctx, answer): gc = geo.GeonamesCache() guess = gc.get_cities_by_name(answer) if len(guess) > 0: game = current_games[ctx.channel] game.answer = answer else: await ctx.send( f"**{ctx.author.mention}: you gave the wrong city name**")
def __init__(self, mag_affilations, max_worker=2): self.aff = mag_affilations self.gc = geonamescache.GeonamesCache() cities = self.gc.get_cities() countries = self.gc.get_countries() self.cities = {v['name'] for k, v in cities.items()} self.countries = {v['name'] for k, v in countries.items()} self._max_workers = max_worker self.mapper = country(from_key='iso', to_key='name')
def getLocation(userCommand): for country in pycountry.countries: if (country.name.lower() in userCommand) or (country.name.capitalize() in userCommand): return country.name cities = geonamescache.GeonamesCache().get_cities() for key in cities: if (cities[key]['name'].lower() in userCommand.split()) or (cities[key]['name'].capitalize() in userCommand.split()): return cities[key]['name'].capitalize() return ""
def is_valid_us_county_fip(county_fip: str): geocache = geonamescache.GeonamesCache() dict_of_counties = geocache.get_us_counties() list_of_county_fips = [d["fips"] for d in dict_of_counties] if len(county_fip) > 5: return False elif type(county_fip) != str: return False if county_fip in list_of_county_fips: return True else: return False
def get_population(Country_ISO): gc = geonamescache.GeonamesCache() countries = gc.get_countries() country = countries.get(Country_ISO) # prevent from crashng if ISO code wrong if country == None: N = 8 * 10 ** 7 else: N = country.get('population') return N
def is_valid_city_name(city: str): geocache = geonamescache.GeonamesCache() dict_of_cities = geocache.get_cities() list_of_cities = [d for d in dict_of_cities.values()] list_of_city_names = [item["name"] for item in list_of_cities] if len(city) > 54: return False elif type(city) != str: return False elif city in list_of_city_names: return True else: return False
def make_geographies_list(): """Make a flat list of cities, counties and states that we can exclude from short names. """ geonames = geonamescache.GeonamesCache() # Make a list of cities with big populations. cities = [ v[u"name"] for v in geonames.get_cities().values() if (v[u"countrycode"] == u"US" and v[u"population"] > 150000) ] counties = [v[u"name"] for v in geonames.get_us_counties()] states = [v[u"name"] for v in geonames.get_us_states().values()] return cities + counties + states
def create_cities_dictionary(self, population_threshold: int = 78000): gc = geonamescache.GeonamesCache() for _, entry in gc.get_cities().items(): if entry['population'] >= population_threshold: if entry['name'] not in self.cities_info: self.cities_info[entry['name']] = [ city.CityEntry(entry['name'], entry['latitude'], entry['longitude'], entry['population']) ] else: self.cities_info[entry['name']].append( city.CityEntry(entry['name'], entry['latitude'], entry['longitude'], entry['population']))
def get_geo(countries0): gc = geonamescache.GeonamesCache() countries = countries0 + [ country.name.lower() for country in pycountry.countries ] countries.append('usa') countries.append('africa') countries.append('asia') countries.append('europe') countries.append('america') countries.append('north') countries.append('south') countries.append('west') countries.append('east') countries.append('city') countries.append('area') countries = set(countries) print(len(countries)) for k, v in gc.get_countries().items(): c = simple_transform(v['name']) if c not in countries: countries.add(c) print(len(countries)) for k, v in gc.get_us_states().items(): c = simple_transform(v['name']) if c not in countries: countries.add(c) print(len(countries)) cities = set() for k, v in gc.get_cities().items(): c = simple_transform(v['name']) cities.add(c) print(len(cities)) cities_alt = set() for k, v in gc.get_cities().items(): c = simple_transform(v['name']) cities_alt.add(c) for e in v['alternatenames']: c = simple_transform(e) cities_alt.add(e) print(len(cities_alt)) return countries, cities, cities_alt
def generate_state_given_city(df, city_list, col_name): # import geonamescache gc = geonamescache.GeonamesCache() state_list = [] for city in city_list: info = gc.get_cities_by_name(city) if info == []: state_list.append(np.nan) else: for dictionary in list(info[0].values()): state = list(dictionary.values()) state_list.append(state[7]) df[col_name] = state_list return df
def is_valid_country_fip(country_fip: str): geocache = geonamescache.GeonamesCache() dict_of_countries = geocache.get_countries() list_of_countries = [d for d in dict_of_countries.values()] list_of_country_fips = [item["fips"] for item in list_of_countries] cleaned_list_of_country_fips = [ string for string in list_of_country_fips if string.strip() ] if len(country_fip) > 2: return False elif type(country_fip) != str: return False elif country_fip in cleaned_list_of_country_fips: return True else: return False
def isCountry(): # Confirms a countries existance. country = "" while True: # Creates a geonamescache object gc = geonamescache.GeonamesCache() # Makes sure the first letter is uppper case country = country.capitalize() # Searches geocache libary for country, returns a dictonary country_list = gc.get_countries_by_names() # If country name is found then it returns a country name. for countries in country_list.keys(): if countries == country: return country else: country = input('| Please enter a country name: ')
def __init__(self, show_result=True): self.dataframes = {} gc = geonamescache.GeonamesCache() gc_data = list(gc.get_countries().values()) gc_states = gc.get_us_states() for state in gc_states: state_data = gc_states[state] if not state_data["name"].endswith(", US"): state_data["name"] += ", US" gc_data += [state_data] self.country_metadata = {} populations = pd.read_csv("populations.csv", names=["country", "population"], index_col=0, header=0) for country in populations.index: if country in normalized_names: populations.loc[normalized_names[country]] = populations.loc[country] self.countries_to_plot = ["Brazil", "China", "Japan", "South Korea", "United States", "India", "Italy", "Germany", "Russia", "Netherlands", "Spain", "World"] for country_data in gc_data: name = country_data["name"] if name in normalized_names: name = normalized_names[name] population = populations.loc[name].population if "continentcode" in country_data: continent = continent_codes[country_data["continentcode"]] else: continent = "North America" self.country_metadata[name] = { "population": population, "continent": continent } for metric in data_urls.keys(): url = base_url + data_urls[metric] # Combine URL parts r = requests.get(url) # Retrieve from URL self.dataframes[metric] = pd.read_csv(StringIO(r.text), sep=",") # Convert into Pandas dataframe if show_result: # Display the first lines display(Markdown("### Raw confirmed cases data, per region/state")) with pd.option_context("display.max_rows", 10, "display.max_columns", 14): display(self.dataframes["confirmed"])
def update_city_list(): admin1 = read_admin1_codes() def admin1_name(code): return next((a[1] for a in admin1 if a[0] == code), None) or '' gc = geonamescache.GeonamesCache() cities = [[ city['name'], admin1_name(city['country code'] + '.' + city['admin1 code']), gc.get_countries()[city['country code']]['name'], city['latitude'], city['longitude'], ] for city in read_cities()] with open('cities/city_list.csv', 'w') as f: writer = unicodecsv.writer(f, encoding='utf-8') writer.writerows(cities)
def all_cities(): """ This function will set three dictionary of python. The package geonamescache contians a list of cities in the World and a list of states in Unite States After scan the list of cities and the list of states, the name of cities and the name of states of America with all lower-case will be loaded into dictionary Cities and States separately. """ gc = geonamescache.GeonamesCache() for state in gc.get_us_states() : States_abbr[state.lower()] = state States[gc.get_us_states()[state]['name'].lower()] = state for city in gc.get_cities() : Cities[gc.get_cities()[city]['name'].lower()] = gc.get_cities()[city]['name'] Cities['new york'] = "New York"
def update_autocomplete_list(): import geonamescache admin1 = read_admin1_codes() def admin1_name(code): return next((a[1] for a in admin1 if a[0] == code), None) or '' gc = geonamescache.GeonamesCache() cities = [ ', '.join([ city['name'], admin1_name(city['country code'] + '.' + city['admin1 code']), gc.get_countries()[city['country code']]['name'] ]) for city in read_cities() ] with codecs.open('cities/autocomplete_list.csv', 'w', 'utf-8') as f: f.write(u'\n'.join(cities))
def main(): print("Started!") gc = geonamescache.GeonamesCache() countries = gc.get_countries_by_names() main_data = get_main_data() country_history = {} for country in main_data: if country in config.skip_countries: continue if "link" in main_data[country]: country_history[country] = get_historical_data( "https://www.worldometers.info/coronavirus/" + main_data[country]["link"]) else: country_history[country] = {} if country in config.countries_not_found_match: new_cname = config.countries_not_found_match[country] else: new_cname = country if new_cname in config.extra_set: country_history[country]["more_info"] = config.extra_set[new_cname] main_data[country]["more_info"] = config.extra_set[new_cname] elif new_cname in countries: country_history[country]["more_info"] = countries[new_cname] main_data[country]["more_info"] = countries[new_cname] else: print("not found", country) # I should report this to a file!!! connector = connect_to_db() db = get_db_cursor(connector) save_countries_general(db, main_data) for country in country_history: save_country_history(db, country, country_history[country]) add_log(db, "robot", "Robot Status: Ok") connector.close() print("Done!")
def get_country_of_origin(self) -> Union[str, None]: """ • Returns the country of origin of the applicant. Currently just checks the document for a country that is NOT the United States. """ gc = geonamescache.GeonamesCache() countries: Iterator[str] = gc.get_countries_by_names().keys() locations: Iterator[str] locations = map(lambda ent: ent.text, self.get_ents(['GPE'])) similar_country: Callable[[str, float], Union[str, None]] similar_country = similar_in_list(countries) for loc in locations: origin: Union[str, None] origin = similar_country(loc, 0.8) if origin and origin != "United States": return origin
def __init__(self, nlp_model): self.gc = geonamescache.GeonamesCache() self.all_countries = self.gc.get_countries_by_names() self.nlp = spacy.load(nlp_model) self.geolocator = Nominatim(user_agent="MenViz") #Global variable end_early is to ensure correct identification for AR's of each location self.end_early = False self.country_list = [ 'Benin', 'Burkina Faso', 'Burundi', 'Cameroun', 'Centrafrique', "Côte d'Ivoire", 'Ethiopia', 'Ghana', 'Guinea', 'Guinea Bissau', 'Gambia', 'Kenya', 'Mali', 'Mauritania', 'Niger', 'Nigeria', 'Congo', 'Senegal', 'South Sudan', 'Sudan', 'Sierra Lone', 'Tanzania', 'Chad', 'Togo', 'Uganda' ] self.extensive_country_list = { 0: ["Benin", "Bénin"], 1: ["Burkina Faso", "Burkina"], 2: ["Burundi"], 3: ["Cameroun", "Cameroon"], 4: ["Centrafrique", "Central Africa", "Central African Republuc"], 5: ["Côte d'Ivoire", "Ivory Coast"], 6: ["Ethiopia"], 7: ["Ghana"], 8: ["Guinea", "Guinée"], 9: ["Guinea Bissau", "Guinée Bissau"], 10: ["Gambia"], 11: ["Kenya"], 12: ["Mali"], 13: ["Mauritania", "Mauritanie"], 14: ["Niger"], 15: ["Nigeria"], 16: ["Democratic Republic of Congo", "Congo", "Dem. Rep. Congo"], 17: ["Senegal", "Sénégal"], 18: ["South Sudan"], 19: ["Sudan"], 20: ["Sierra Lone"], 21: ["Tanzania"], 22: ["Chad", "Tchad", "Tchad"], 23: ["Togo"], 24: ["Uganda"] } #problematic cases to discard after obtaininig named entities self.problematic_cases = ["Upper", "Upper East", "Upper West", "East"]
def isCity(country): city = "" while True: # Creates a geonamescache object gc = geonamescache.GeonamesCache() # Makes sure the first letter is uppper case city = city.capitalize() # Searches geocache libary for city name, returns a dictonary city_list = gc.get_cities_by_name('%s' % city) # Use len to find if city exists for city in city_list: code = list(city.keys())[0] county_code = (city[code]['countrycode']) # THIS WORKS!!!!! city = (city[code]['name']) # THIS WORKS!!!!! location = Location(country, city, county_code) return (location) else: city = input('| Please enter a city name: ')
def __init__(self): self.dataframes = {} gc = geonamescache.GeonamesCache() gc_data = gc.get_countries() self.country_metadata = {} normalized_names = { "Timor Leste": "East Timor", "Vatican": "Vatican City", "Democratic Republic of the Congo": "Congo (Kinshasa)", "Republic of the Congo": "Congo (Brazzaville)", "Cabo Verde": "Cape Verde" } for country_code in gc_data: metadata = gc_data[country_code] name = metadata["name"] if name in normalized_names: name = normalized_names[name] population = metadata["population"] area = metadata["areakm2"] continent = continent_codes[metadata["continentcode"]] self.country_metadata[name] = { "population": population, "area": area, "continent": continent } for metric in data_urls.keys(): url = base_url + data_urls[metric] # Combine URL parts r = requests.get(url) # Retrieve from URL self.dataframes[metric] = pd.read_csv(StringIO( r.text), sep=",") # Convert into Pandas dataframe # Display the first lines display(Markdown("### Raw confirmed cases data, per region/state")) with pd.option_context("display.max_rows", 10, "display.max_columns", 14): display(self.dataframes["confirmed"])
def set_event_location(self, ent, event): locationDict = self.get_frequency(self.get_entity('LOCATION', ent)) #print locationDict country = self.find_event_country(locationDict) gc = geonamescache.GeonamesCache() # If we find a country, find a city if country != '': for place,v in locationDict: if place != country: if self.is_city_in_country(place, country): event.city = place break event.country = country # If find no country, find a city and set the country else: for place,v in locationDict: if self.is_city_valid(place): event.city = place break if event.country == 'United States' or event.country == '': state = self.find_event_state(locationDict) if state != '': event.state = state event.country = 'United States'
'Virgin Islands': 'VI', 'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY', 'United States': 'US' } abb_name_dic = dict(map(reversed, state_name_abb_dic.items())) # todo: can I do this with the first of these lines? I don't know what genomaescache has. Given I import it, I might as well use it if I can. all_fips_name_dic = { **{ dict['fips']: dict['name'] for dict in geonamescache.GeonamesCache().get_us_counties() }, **{k: abb_name_dic[v] for k, v in fips_abb_dic.items() if v != 'PR'} } all_name_fips_dic = dict(map(reversed, all_fips_name_dic.items())) ################### ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) def filenamer(path): return os.path.join(ROOT_DIR, path)