def create_country_finder():
    result = list([
        unidecode.unidecode(x["name"])
        for x in geonamescache.GeonamesCache().get_countries().values()
    ])
    regex_compiled = _compile_regex(result)
    return LocationFinder(regex_compiled)
def cityDic():
    city = geonamescache.GeonamesCache().get_cities()
    citiyDic = {}
    cityList = []
    n = 0
    for key in city:

        if city[key]['countrycode'] in ['AT', 'DE', 'GB', 'FR'
                                        ] and city[key]['population'] > 150000:
            city_name = city[key]['name']
            cityList.append(city_name.lower())
            citiyDic[city_name.lower()] = city[key]

            #it_city_name = translator.translate(city_name, dest='it').text
            translator = Translator(to_lang="it")
            it_city_name = translator.translate(city_name)

            if it_city_name != city_name:
                cityList.append(it_city_name.lower())
                citiyDic[it_city_name.lower()] = city[key]
            n += 1

    cityFilter = [
        'livorno',
    ]  # 'nice']
    cityList = list(filter(lambda a: a not in cityFilter, cityList))
    cityList.append('barcellona')

    print(n)
    print(len(cityList))
    print(len(cityList))
    return citiyDic, cityList
    def __init__(self):
        self.dataframes = {}
        gc = geonamescache.GeonamesCache()
        gc_data = gc.get_countries()
        self.country_metadata = {}
        normalized_names = {
            "Timor Leste": "East Timor",
            "Vatican": "Vatican City",
            "Democratic Republic of the Congo": "Congo (Kinshasa)",
            "Republic of the Congo": "Congo (Brazzaville)",
            "Cabo Verde": "Cape Verde"
        }

        for country_code in gc_data:
            metadata = gc_data[country_code]
            name = metadata["name"]
            if name in normalized_names:
                name = normalized_names[name]
            population = metadata["population"]
            area = metadata["areakm2"]
            continent = continent_codes[metadata["continentcode"]]

            self.country_metadata[name] = {
                "population": population,
                "area": area,
                "continent": continent
            }

        for metric in data_urls.keys():
            url = base_url + data_urls[metric]  # Combine URL parts
            r = requests.get(url)  # Retrieve from URL
            self.dataframes[metric] = pd.read_csv(StringIO(r.text), sep=",")  # Convert into Pandas dataframe
Exemple #4
0
def cityDic():
    city = geonamescache.GeonamesCache().get_cities()
    citiyDic = {}
    cityList = []
    n = 0
    for key in city:
        if city[key]['countrycode'] == 'IT' and city[key]['population'] > 20000:
            if len(city[key]['alternatenames'][0]) != 0:
                validCityNames = [city[key]['name'].lower()] + [
                    name.lower() for name in city[key]['alternatenames']
                    if only_roman_chars(name) and notAllUpper(name)
                    and len(name) > 3
                ]
                cityList += validCityNames
                for name in validCityNames:
                    citiyDic[name] = city[key]

            else:
                cityList += [city[key]['name'].lower()]
                citiyDic[city[key]['name'].lower()] = city[key]

            n += 1

    cityFilter = ['regio', 'marino', 'come', 'bra', 'ramma']
    cityList = list(filter(lambda a: a not in cityFilter, cityList))
    cityList = list(set(cityList))
    print(len(cityList))
    return citiyDic, cityList
Exemple #5
0
 def is_city_in_country(self, city_name, country):
         gc = geonamescache.GeonamesCache()
         cities = gc.get_cities_by_name(city_name)
         for city in cities:
                 for k,v in city.iteritems():
                         if country == pycountry.countries.get(alpha2=v.get('countrycode')).name:
                                 return True
         return False
Exemple #6
0
 def __init__(self):
     self.cities = []
     # initialise the cities dict into a more useful form
     for city in geonamescache.GeonamesCache().get_cities().values():
         # lowercase all the names
         names = {city['name'].lower()}
         names.update(n.lower() for n in city['alternatenames'])
         self.cities.append((names, city))
Exemple #7
0
    def process_item(self, item, spider):
        gc = geonamescache.GeonamesCache()
        # gets nested dictionary for countries
        countries = gc.get_countries()
        country_list = {}
        for country in countries:
            for k, v in countries[country].items():
                if k == "name":
                    country_list[v] = country
                elif k == "iso":
                    country_list[v] = country
                    s = ".".join(v[i:i + 1] for i in range(0, len(v), 1))
                    country_list[s] = country
                elif k == "iso3":
                    country_list[v] = country
        # gets nested dictionary for cities
        cities = gc.get_cities()
        city_list = []
        city_country = []
        for elem in cities.values():
            city_list.append(elem['name'])
            city_country.append({elem['name']: elem['countrycode']})

        nlp = spacy.load("en_core_web_sm")
        doc = nlp(item['main_text'])
        country = []
        location = []
        count_country = {}
        result = []
        for ent in doc.ents:
            if ent.label_ == 'GPE':
                # find the matching country inside country_list
                find_country = country_list.get(ent.text)
                if find_country is not None and count_country.get(
                        find_country) is None:
                    count_country[find_country] = count_country.get(
                        find_country, 0) + 1
                    country.append(ent.text)
                elif ent.text in city_list:
                    for dict_city_country in city_country:
                        if ent.text in list(dict_city_country.keys()):
                            location.append(dict_city_country)
        for country_name in country:
            iso = country_list[country_name]
            for location_dict in location:
                if iso in list(location_dict.values()):
                    locationItem = LocationItem()
                    locationItem["country"] = country_name
                    locationItem["location"] = list(
                        location_dict.keys()).pop(0)
                    result.append(locationItem)
            if not any(elem["country"] == country_name for elem in result):
                result.append({"country": country_name, "location": ""})

        i = 0
        for e in result:
            item['reports'][0]['locations'].append(e)
        return item
Exemple #8
0
 async def city_answer(ctx, answer):
     gc = geo.GeonamesCache()
     guess = gc.get_cities_by_name(answer)
     if len(guess) > 0:
         game = current_games[ctx.channel]
         game.answer = answer
     else:
         await ctx.send(
             f"**{ctx.author.mention}: you gave the wrong city name**")
Exemple #9
0
 def __init__(self, mag_affilations, max_worker=2):
     self.aff = mag_affilations
     self.gc = geonamescache.GeonamesCache()
     cities = self.gc.get_cities()
     countries = self.gc.get_countries()
     self.cities = {v['name'] for k, v in cities.items()}
     self.countries = {v['name'] for k, v in countries.items()}
     self._max_workers = max_worker
     self.mapper = country(from_key='iso', to_key='name')
def getLocation(userCommand):
    for country in pycountry.countries:
        if (country.name.lower() in userCommand) or (country.name.capitalize()
                                                     in userCommand):
            return country.name
    cities = geonamescache.GeonamesCache().get_cities()
    for key in cities:
        if (cities[key]['name'].lower()
                in userCommand.split()) or (cities[key]['name'].capitalize()
                                            in userCommand.split()):
            return cities[key]['name'].capitalize()
    return ""
def is_valid_us_county_fip(county_fip: str):
    geocache = geonamescache.GeonamesCache()
    dict_of_counties = geocache.get_us_counties()
    list_of_county_fips = [d["fips"] for d in dict_of_counties]
    if len(county_fip) > 5:
        return False
    elif type(county_fip) != str:
        return False
    if county_fip in list_of_county_fips:
        return True
    else:
        return False
Exemple #12
0
def get_population(Country_ISO):
    gc = geonamescache.GeonamesCache()
    countries = gc.get_countries()
    country = countries.get(Country_ISO)

    # prevent from crashng if ISO code wrong
    if country == None:
        N = 8 * 10 ** 7
    else:
        N = country.get('population')

    return N
def is_valid_city_name(city: str):
    geocache = geonamescache.GeonamesCache()
    dict_of_cities = geocache.get_cities()
    list_of_cities = [d for d in dict_of_cities.values()]
    list_of_city_names = [item["name"] for item in list_of_cities]
    if len(city) > 54:
        return False
    elif type(city) != str:
        return False
    elif city in list_of_city_names:
        return True
    else:
        return False
    def make_geographies_list():
        """Make a flat list of cities, counties and states that we can exclude
        from short names.
        """
        geonames = geonamescache.GeonamesCache()

        # Make a list of cities with big populations.
        cities = [
            v[u"name"] for v in geonames.get_cities().values()
            if (v[u"countrycode"] == u"US" and v[u"population"] > 150000)
        ]
        counties = [v[u"name"] for v in geonames.get_us_counties()]
        states = [v[u"name"] for v in geonames.get_us_states().values()]
        return cities + counties + states
Exemple #15
0
 def create_cities_dictionary(self, population_threshold: int = 78000):
     gc = geonamescache.GeonamesCache()
     for _, entry in gc.get_cities().items():
         if entry['population'] >= population_threshold:
             if entry['name'] not in self.cities_info:
                 self.cities_info[entry['name']] = [
                     city.CityEntry(entry['name'], entry['latitude'],
                                    entry['longitude'], entry['population'])
                 ]
             else:
                 self.cities_info[entry['name']].append(
                     city.CityEntry(entry['name'], entry['latitude'],
                                    entry['longitude'],
                                    entry['population']))
def get_geo(countries0):
    gc = geonamescache.GeonamesCache()

    countries = countries0 + [
        country.name.lower() for country in pycountry.countries
    ]
    countries.append('usa')
    countries.append('africa')
    countries.append('asia')
    countries.append('europe')
    countries.append('america')
    countries.append('north')
    countries.append('south')
    countries.append('west')
    countries.append('east')
    countries.append('city')
    countries.append('area')

    countries = set(countries)
    print(len(countries))

    for k, v in gc.get_countries().items():
        c = simple_transform(v['name'])
        if c not in countries:
            countries.add(c)
    print(len(countries))

    for k, v in gc.get_us_states().items():
        c = simple_transform(v['name'])
        if c not in countries:
            countries.add(c)
    print(len(countries))

    cities = set()
    for k, v in gc.get_cities().items():
        c = simple_transform(v['name'])
        cities.add(c)
    print(len(cities))

    cities_alt = set()
    for k, v in gc.get_cities().items():
        c = simple_transform(v['name'])
        cities_alt.add(c)
        for e in v['alternatenames']:
            c = simple_transform(e)
            cities_alt.add(e)
    print(len(cities_alt))

    return countries, cities, cities_alt
Exemple #17
0
def generate_state_given_city(df, city_list, col_name):
#     import geonamescache
    gc = geonamescache.GeonamesCache()
    state_list = []  
    
    for city in city_list:
        info = gc.get_cities_by_name(city)
        if info == []:
            state_list.append(np.nan)
        else:
            for dictionary in list(info[0].values()):
                state = list(dictionary.values())
                state_list.append(state[7])
    df[col_name] = state_list
    return df
def is_valid_country_fip(country_fip: str):
    geocache = geonamescache.GeonamesCache()
    dict_of_countries = geocache.get_countries()
    list_of_countries = [d for d in dict_of_countries.values()]
    list_of_country_fips = [item["fips"] for item in list_of_countries]
    cleaned_list_of_country_fips = [
        string for string in list_of_country_fips if string.strip()
    ]
    if len(country_fip) > 2:
        return False
    elif type(country_fip) != str:
        return False
    elif country_fip in cleaned_list_of_country_fips:
        return True
    else:
        return False
Exemple #19
0
def isCountry():
    # Confirms a countries existance.
    country = ""
    while True:
        # Creates a geonamescache object
        gc = geonamescache.GeonamesCache()
        # Makes sure the first letter is uppper case
        country = country.capitalize()
        # Searches geocache libary for country, returns a dictonary
        country_list = gc.get_countries_by_names()
        # If country name is found then it returns a country name.
        for countries in country_list.keys():
            if countries == country:
                return country
        else:
            country = input('| Please enter a country name: ')
Exemple #20
0
    def __init__(self, show_result=True):
        self.dataframes = {}
        gc = geonamescache.GeonamesCache()
        gc_data = list(gc.get_countries().values())
        gc_states = gc.get_us_states()

        for state in gc_states:
            state_data = gc_states[state]
            if not state_data["name"].endswith(", US"):
                state_data["name"] += ", US"
            gc_data += [state_data]

        self.country_metadata = {}
        populations = pd.read_csv("populations.csv", names=["country", "population"], index_col=0, header=0)
        for country in populations.index:
            if country in normalized_names:
                populations.loc[normalized_names[country]] = populations.loc[country]

        self.countries_to_plot = ["Brazil", "China", "Japan", "South Korea", "United States",
                                  "India", "Italy", "Germany", "Russia", "Netherlands", "Spain", "World"]

        for country_data in gc_data:
            name = country_data["name"]
            if name in normalized_names:
                name = normalized_names[name]
            population = populations.loc[name].population

            if "continentcode" in country_data:
                continent = continent_codes[country_data["continentcode"]]
            else:
                continent = "North America"

            self.country_metadata[name] = {
                "population": population,
                "continent": continent
            }

        for metric in data_urls.keys():
            url = base_url + data_urls[metric]  # Combine URL parts
            r = requests.get(url)  # Retrieve from URL
            self.dataframes[metric] = pd.read_csv(StringIO(r.text), sep=",")  # Convert into Pandas dataframe

        if show_result:
            # Display the first lines
            display(Markdown("### Raw confirmed cases data, per region/state"))
            with pd.option_context("display.max_rows", 10, "display.max_columns", 14):
                display(self.dataframes["confirmed"])
Exemple #21
0
def update_city_list():
    admin1 = read_admin1_codes()

    def admin1_name(code):
        return next((a[1] for a in admin1 if a[0] == code), None) or ''

    gc = geonamescache.GeonamesCache()
    cities = [[
        city['name'],
        admin1_name(city['country code'] + '.' + city['admin1 code']),
        gc.get_countries()[city['country code']]['name'],
        city['latitude'],
        city['longitude'],
    ] for city in read_cities()]
    with open('cities/city_list.csv', 'w') as f:
        writer = unicodecsv.writer(f, encoding='utf-8')
        writer.writerows(cities)
def all_cities():
	""" 
		This function will set three dictionary of python.
		
		The package geonamescache contians a list of cities in the World and
		a list of states in Unite States
		After scan the list of cities and the list of states, the name of cities
		and the name of states of America with all lower-case will be loaded into
		dictionary Cities and States separately.
	""" 
	gc = geonamescache.GeonamesCache()
	for state in gc.get_us_states() :
		States_abbr[state.lower()] = state
		States[gc.get_us_states()[state]['name'].lower()] = state
	for city in gc.get_cities() :
		Cities[gc.get_cities()[city]['name'].lower()] = gc.get_cities()[city]['name']
	Cities['new york'] = "New York"
def update_autocomplete_list():
    import geonamescache
    admin1 = read_admin1_codes()

    def admin1_name(code):
        return next((a[1] for a in admin1 if a[0] == code), None) or ''

    gc = geonamescache.GeonamesCache()
    cities = [
        ', '.join([
            city['name'],
            admin1_name(city['country code'] + '.' + city['admin1 code']),
            gc.get_countries()[city['country code']]['name']
        ]) for city in read_cities()
    ]
    with codecs.open('cities/autocomplete_list.csv', 'w', 'utf-8') as f:
        f.write(u'\n'.join(cities))
Exemple #24
0
def main():
    print("Started!")

    gc = geonamescache.GeonamesCache()
    countries = gc.get_countries_by_names()

    main_data = get_main_data()

    country_history = {}

    for country in main_data:
        if country in config.skip_countries:
            continue

        if "link" in main_data[country]:
            country_history[country] = get_historical_data(
                "https://www.worldometers.info/coronavirus/" +
                main_data[country]["link"])
        else:
            country_history[country] = {}

        if country in config.countries_not_found_match:
            new_cname = config.countries_not_found_match[country]
        else:
            new_cname = country

        if new_cname in config.extra_set:
            country_history[country]["more_info"] = config.extra_set[new_cname]
            main_data[country]["more_info"] = config.extra_set[new_cname]
        elif new_cname in countries:
            country_history[country]["more_info"] = countries[new_cname]
            main_data[country]["more_info"] = countries[new_cname]
        else:
            print("not found", country)  # I should report this to a file!!!

    connector = connect_to_db()
    db = get_db_cursor(connector)
    save_countries_general(db, main_data)
    for country in country_history:
        save_country_history(db, country, country_history[country])

    add_log(db, "robot", "Robot Status: Ok")

    connector.close()
    print("Done!")
    def get_country_of_origin(self) -> Union[str, None]:
        """
        • Returns the country of origin of the applicant. Currently just checks
        the document for a country that is NOT the United States.
        """
        gc = geonamescache.GeonamesCache()
        countries: Iterator[str] = gc.get_countries_by_names().keys()

        locations: Iterator[str]
        locations = map(lambda ent: ent.text, self.get_ents(['GPE']))

        similar_country: Callable[[str, float], Union[str, None]]
        similar_country = similar_in_list(countries)

        for loc in locations:
            origin: Union[str, None]
            origin = similar_country(loc, 0.8)
            if origin and origin != "United States":
                return origin
Exemple #26
0
 def __init__(self, nlp_model):
     self.gc = geonamescache.GeonamesCache()
     self.all_countries = self.gc.get_countries_by_names()
     self.nlp = spacy.load(nlp_model)
     self.geolocator = Nominatim(user_agent="MenViz")
     #Global variable end_early is to ensure correct identification for AR's of each location
     self.end_early = False
     self.country_list = [
         'Benin', 'Burkina Faso', 'Burundi', 'Cameroun', 'Centrafrique',
         "Côte d'Ivoire", 'Ethiopia', 'Ghana', 'Guinea', 'Guinea Bissau',
         'Gambia', 'Kenya', 'Mali', 'Mauritania', 'Niger', 'Nigeria',
         'Congo', 'Senegal', 'South Sudan', 'Sudan', 'Sierra Lone',
         'Tanzania', 'Chad', 'Togo', 'Uganda'
     ]
     self.extensive_country_list = {
         0: ["Benin", "Bénin"],
         1: ["Burkina Faso", "Burkina"],
         2: ["Burundi"],
         3: ["Cameroun", "Cameroon"],
         4: ["Centrafrique", "Central Africa", "Central African Republuc"],
         5: ["Côte d'Ivoire", "Ivory Coast"],
         6: ["Ethiopia"],
         7: ["Ghana"],
         8: ["Guinea", "Guinée"],
         9: ["Guinea Bissau", "Guinée Bissau"],
         10: ["Gambia"],
         11: ["Kenya"],
         12: ["Mali"],
         13: ["Mauritania", "Mauritanie"],
         14: ["Niger"],
         15: ["Nigeria"],
         16: ["Democratic Republic of Congo", "Congo", "Dem. Rep. Congo"],
         17: ["Senegal", "Sénégal"],
         18: ["South Sudan"],
         19: ["Sudan"],
         20: ["Sierra Lone"],
         21: ["Tanzania"],
         22: ["Chad", "Tchad", "Tchad"],
         23: ["Togo"],
         24: ["Uganda"]
     }
     #problematic cases to discard after obtaininig named entities
     self.problematic_cases = ["Upper", "Upper East", "Upper West", "East"]
Exemple #27
0
def isCity(country):
    city = ""
    while True:
        # Creates a geonamescache object
        gc = geonamescache.GeonamesCache()
        # Makes sure the first letter is uppper case
        city = city.capitalize()
        # Searches geocache libary for city name, returns a dictonary
        city_list = gc.get_cities_by_name('%s' % city)
        # Use len to find if city exists
        for city in city_list:
            code = list(city.keys())[0]
            county_code = (city[code]['countrycode'])  # THIS WORKS!!!!!
            city = (city[code]['name'])  # THIS WORKS!!!!!
            location = Location(country, city, county_code)
            return (location)

        else:
            city = input('| Please enter a city name: ')
Exemple #28
0
    def __init__(self):
        self.dataframes = {}
        gc = geonamescache.GeonamesCache()
        gc_data = gc.get_countries()
        self.country_metadata = {}
        normalized_names = {
            "Timor Leste": "East Timor",
            "Vatican": "Vatican City",
            "Democratic Republic of the Congo": "Congo (Kinshasa)",
            "Republic of the Congo": "Congo (Brazzaville)",
            "Cabo Verde": "Cape Verde"
        }

        for country_code in gc_data:
            metadata = gc_data[country_code]
            name = metadata["name"]
            if name in normalized_names:
                name = normalized_names[name]
            population = metadata["population"]
            area = metadata["areakm2"]
            continent = continent_codes[metadata["continentcode"]]

            self.country_metadata[name] = {
                "population": population,
                "area": area,
                "continent": continent
            }

        for metric in data_urls.keys():
            url = base_url + data_urls[metric]  # Combine URL parts
            r = requests.get(url)  # Retrieve from URL
            self.dataframes[metric] = pd.read_csv(StringIO(
                r.text), sep=",")  # Convert into Pandas dataframe

        # Display the first lines
        display(Markdown("### Raw confirmed cases data, per region/state"))
        with pd.option_context("display.max_rows", 10, "display.max_columns",
                               14):
            display(self.dataframes["confirmed"])
Exemple #29
0
 def set_event_location(self, ent, event):
         locationDict = self.get_frequency(self.get_entity('LOCATION', ent))
         #print locationDict
         country = self.find_event_country(locationDict)
         gc = geonamescache.GeonamesCache()
         # If we find a country, find a city
         if country != '':
                 for place,v in locationDict:
                         if place != country:
                                 if self.is_city_in_country(place, country):
                                         event.city = place
                                         break
                 event.country = country
         # If find no country, find a city and set the country        
         else:
                 for place,v in locationDict:
                         if self.is_city_valid(place):
                                 event.city = place
                                 break
         if event.country == 'United States' or event.country == '':
                 state = self.find_event_state(locationDict)
                 if state != '':
                         event.state = state
                         event.country = 'United States'
Exemple #30
0
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
    'United States': 'US'
}

abb_name_dic = dict(map(reversed, state_name_abb_dic.items()))

# todo: can I do this with the first of these lines? I don't know what genomaescache has. Given I import it, I might as well use it if I can.
all_fips_name_dic = {
    **{
        dict['fips']: dict['name']
        for dict in geonamescache.GeonamesCache().get_us_counties()
    },
    **{k: abb_name_dic[v]
       for k, v in fips_abb_dic.items() if v != 'PR'}
}
all_name_fips_dic = dict(map(reversed, all_fips_name_dic.items()))

###################

ROOT_DIR = os.path.dirname(os.path.abspath(__file__))


def filenamer(path):
    return os.path.join(ROOT_DIR, path)