def get_location(title, main_text, url):
    locations_list = []
    cities = ''
    countries = ''
    splited_url = re.split(r'\/|\-', url)
    splited_url = " ".join(splited_url)

    places = geograpy.get_place_context(text=main_text)
    places2 = geograpy.get_place_context(text=title)
    places3 = geograpy.get_place_context(text=splited_url)

    cities = places.cities
    countries = places.countries
    cities2 = places2.cities
    countries2 = places2.countries
    cities3 = places3.cities
    countries3 = places3.countries

    if cities:
        locations_list = get_location_objects_from_cities(
            cities, locations_list)

    if cities2:
        locations_list = get_location_objects_from_cities(
            cities2, locations_list)

    if cities3:
        locations_list = get_location_objects_from_cities(
            cities3, locations_list)

    if countries:
        locations_list = get_location_objects_from_countries(
            countries, locations_list)

    if countries2:
        locations_list = get_location_objects_from_countries(
            countries2, locations_list)

    if countries3:
        locations_list = get_location_objects_from_countries(
            countries3, locations_list)

    if not locations_list:
        # set up as default locatoin - US, unknown city
        location = {}
        location['country'] = 'United States'
        location['location'] = 'unknown'

        locations_list.append(location)

    return locations_list
Esempio n. 2
0
    def process_item(self, item, spider):
        '''
        DESCRIPTION:
        -----------
        For each news item, list of countries specified in news text,
        is fetched by using 'geograpy'.

        RETURNS:
        --------
        News item with 'countriesMentioned' field updated is returned.
        '''
        try:
            places = geograpy.get_place_context(url=item['newsUrl'])
            countryList = []
            for country in places.country_mentions:
                countryList.append(country[0].encode('ascii', 'ignore'))
            item['countriesMentioned'] = countryList
        except etree.XMLSyntaxError as e:
            logging.info('XML Syntax Error' + e)
        except etree.DocumentInvalid as e:
            logging.info('XML Document Invalid Error' + e)
        except Exception:
            raise DropItem("Failed to extract country mentions from: " +
                           item['newsUrl'])

        return item
	def parseCountries (self, title):
		countries 		= geograpy.get_place_context(text=title).countries
		newCountries 	= []
		for country in countries:
			if country != HOME_COUNTRY:
				newCountries.append(country)
		return newCountries
Esempio n. 4
0
def locFromText(set_Country, textList, filterList):
    """
    Extract location from twitters
    :param set_Country: define a country filter (one location name can associated with multiple countries
    :param textList: A list of all Twitter text selected from database
    :param filterList: Name list that should not be considered as location under certain event
    :return: A filtered list of location extracted from Twitter text
    """
    loc = []
    print('Start extracting locations from texts')
    for t in textList:
        # print(row)
        text = t[1]
        if len(text) > 0:
            text = re.sub(r'[^\w]', ' ', text)  # remove symbol

            places = geograpy.get_place_context(text=text)
            addStr = places.address_strings
            for add in addStr:
                country = add.split(',')[
                    2]  # get country name from extracted address_strings
                # print(country)
                if set_Country in country and not any(e in add
                                                      for e in filterList):
                    # print('City:', add)
                    loc.append((t[0], add))
    return loc
	def parseCities (self, title):
		cities 		= geograpy.get_place_context(text=title).cities
		newCities 	= []
		for city in cities:
			if city != HOME_CITY:
				newCities.append(city)
		return newCities
Esempio n. 6
0
    def get_places(self):

        """"
        Why i need to open the dataset every time i parse this  
        """
        text_input = self.text
        location_dict={}
        places = geograpy.get_place_context(text=text_input)

        for city_dict in CITIES: 
            if city_dict['city'] in places.countries: 
                location_dict[city_dict['city']] = {
                    'country': city_dict['country'],
                    'state': city_dict['state'],
                    'city': city_dict['city'], 
                    'name': city_dict['city'],
                }
            if city_dict['state'] in places.countries:
                location_dict[city_dict['state']] = {
                    'state': city_dict['state'],
                    'country': city_dict['country'],
                    'name': city_dict['state'],
                }
            if city_dict['country'] in places.countries:
                location_dict[city_dict['country']] = {
                    'country': city_dict['country'],
                    'name': city_dict['country'],
                }
        return list(location_dict.values())
Esempio n. 7
0
    def getGeoLocation(self, newsUrl):
        '''
        DESCRIPTION:
        ------------
        This function finds the country and it's geo location, specified
        in newsurl.

        PARAMETERS:
        -----------
        newsurl: URL corresponding to news.

        RETURNS:
        --------
        1. geoPoint: Lat Long of country mentioned in newsUrl.
        2. country : Country specified in newsUrl.
        '''
        # Set the geo_point
        places    = geograpy.get_place_context(url = newsUrl)
        geoPoint  = []
        country   = ""
        try:
            for country in places.country_mentions:
                country    = country[0].encode('ascii', 'ignore')
                geolocator = Nominatim()
                location   = geolocator.geocode(country)
                geoPoint.append(location.longitude)
                geoPoint.append(location.latitude)
                break
        except:
            geoPoint = []
            country  = ""
        return (geoPoint, country)
Esempio n. 8
0
def get_place(title, desc, url):
    if desc != None:
        text = title + desc
    else:
        text = title
    places = get_place_context(text=text)
    if len(places.cities) > 0:
        return places.cities[0]
    elif len(places.countries) > 0:
        return places.countries[0]

    places = get_place_context(url=url)
    if len(places.cities) > 0:
        return places.cities[0]
    elif len(places.countries) > 0:
        return places.countries[0]

    return 0
Esempio n. 9
0
def findLocations(corpus):

	# parsing corpus with geograpy
	places = geo.get_place_context(text=corpus)

	# extracting locations
	print places.countries
	print places.regions
	print places.cities
	print places.other
Esempio n. 10
0
def main():
    # How many pictures to download
    pic_count = 15

    if len(sys.argv) >= 2:
        pic_count = int(sys.argv[1])

    # Create picture directory
    picture_dir = os.path.join(os.getcwd(), 'pics')
    create_dir(picture_dir)

    # Create unknown location directory
    unknown_location_dir = os.path.join(picture_dir, 'unknown')
    create_dir(unknown_location_dir)

    listing = travel_subreddit.hot(limit=pic_count)

    if len(sys.argv) >= 3 and sys.argv[2] == "--top":
        listing = travel_subreddit.top(limit=pic_count)

    # TODO: Preprocess list (remove all non image posts)

    for submission in listing:
        # TODO: handle imgur links 
        # Only download jpg
        if submission.url.endswith('.jpg'):
            # Combine all comments into one text to search for correct country
            search_str = ""

            # Get all top level comments and add to search string
            for comment in list(submission.comments):
                if hasattr(comment, 'body'):
                    search_str += comment.body

            places = geograpy.get_place_context(text=search_str)
            
            if places.countries:
                # Get the country with the highest mentions 
                country = max(places.country_mentions, key=lambda item:item[1])[0]

                country_dir = os.path.join(picture_dir, country)

                create_dir(country_dir)
            else:
                country = "unknown"

            # Clean up title for filename
            words = nltk.word_tokenize(submission.title)
            space_separated_title = ' '.join(words)
            underscored_title = space_separated_title.replace(' ', '_')

            title = re.sub(r'\W+', '', underscored_title) + '.jpg'
            filepath = os.path.join(picture_dir, country, title)

            save_image(filepath, submission.url)
Esempio n. 11
0
def getPlaceET_fromText_NLTK(text):
    result = list()
    if not text:
        return filter(None, result)    

    # You can now access all of the places found by the Extractor
    places = geograpy.get_place_context(text=text)    
    for place in (places.countries + places.other):
        c = getISO3166_1code(place)
        result.append(c)
    return filter(None, flatten(result))
Esempio n. 12
0
def getPlaceET_fromText_NLTK(text):
    result = list()
    if not text:
        return filter(None, result)

    # You can now access all of the places found by the Extractor
    places = geograpy.get_place_context(text=text)
    for place in (places.countries + places.other):
        c = getISO3166_1code(place)
        result.append(c)
    return filter(None, flatten(result))
Esempio n. 13
0
def getTweetLocation(tweet):
    '''
    Detects tweet's country based on tweet's information. If tweet has 'place' declared, we extract 'country' from it.
    Else, we check user's location and use geograpy. So, if country is declared we extract it, else if city is declared
    we assigned it to the country it belongs.
    :param tweet: tweet
    :return:
    '''
    place = tweet['place']
    # geo = tweet['geo'] #no 'geo' found in tweets
    # coordinates = tweet['coordinates'] #no 'coordinates' found in tweets

    #if place exists in tweet, return it
    if place:
        #print(place)
        return tweet['place']['country']


    #if no 'place' exists in tweet, we try to get location info from user's location
    user_loc = tweet['user']['location']  # get user's location
    #print(user_loc)

    #if user's location is empty, return None
    if user_loc == '':
        return None

    #try to find some 'standar' keywords refering to specific countries (geograpy could not identify them)
    country = recognizeSpecificCountries(user_loc)
    #print(country)

    if country:
        return country


    #find country using geograpy
    places = geograpy.get_place_context(text=user_loc)
    #print(places)
    #print(places.countries)

    if not places.countries:
        #print('No country found\n')
        return None

    #geograpy returns a list with all possible counries, we take the first one !!!!!
    #If the input text contains both city and country, then the first element
    # of "countries" list is always the country contained in text
    # e.g. Input: 'London'
    #      Output: countries=['United Kingdom', 'United States', 'Canada']
    #      Input:  'London, Canada'
    #      Output: countries=['Canada', 'Spain', 'United Kingdom', 'United States']
    return places.countries[0]
Esempio n. 14
0
def map():
    text = request.form['text']
    places = geograpy.get_place_context(text=text)
    places = places.regions
    gelocator = Nominatim(user_agent=app.config['GOOGLE_MAP_API'])
    lat_lon = []
    for place in places:
        try:
            location = gelocator.geocode(place)
            if location:
                lat_lon.append([location.latitude, location.longitude])
        except GeocoderTimedOut:
            continue
    # something = request.form['map']
    return render_template('map.html', lat_lon = lat_lon)
Esempio n. 15
0
def findLocFromURL(urlList):
    """
    extract location info directly from a url
    :param urlList: list of filtered urls
    :return: location names
    """
    print('start extract location from url')
    findLoc = []
    for url in urlList:
        print(url[0])
        places = geograpy.get_place_context(url=url[1])
        addr = places.address_strings
        print(addr)
        if len(addr) > 0:
            findLoc.append((url[0], addr))
    return findLoc
Esempio n. 16
0
    def testProceedingsExample(self):
        '''
        test a proceedings title Example
        '''
        examples = [
            '''Proceedings of the 
IEEE 14th International Conference on 
Semantic Computing, ICSC 2020, 
San Diego, CA, USA, 
February 3-5, 2020'''
        ]
        for example in examples:
            places = geograpy.get_place_context(text=example)
            print(places)
            city = geograpy.locateCity(example, debug=False)
            print(city)
Esempio n. 17
0
def name_reg(text):
    """Return a triplet of recognized entities."""
    countries = Counter()
    regions = Counter()
    cities = Counter()

    if text:
        places = geograpy.get_place_context(text=text)
        if places.country_mentions:
            countries.update(unpack_fd(places.country_mentions))
        if places.region_mentions:
            regions.update(unpack_fd(places.region_mentions))
        if places.city_mentions:
            cities.update(unpack_fd(places.city_mentions))

    return (countries, regions, cities)
Esempio n. 18
0
def query_crawled_index(request, core_name, indexed_path):
    '''
        To query crawled data that has been indexed into
        Solr or Elastichsearch and return location names
    '''
    if "solr" in indexed_path.lower():
        if IndexFile(core_name, indexed_path.lower()):
            location_names = []
            points = []
            query_range = 500
            try:
                url = "{0}/select?q=*%3A*&wt=json&rows=1".format(indexed_path)
                response = urllib2.urlopen(url)
                numFound = eval(response.read())['response']['numFound']
                for row in range(0, int(numFound), query_range):
                    query_url = "{0}/select?q=*%3A*&start={1}&rows={2}&wt=json".format(
                        indexed_path, row, row + query_range)
                    places = geograpy.get_place_context(url=query_url)
                    location_names.append(places.regions)
                    location_names.append(places.countries)
                    location_names.append(places.cities)
                    location_names.append(places.other)
                    location_names = flatten(location_names)
                print "Found {0} Locations for {1}".format(
                    len(location_names), indexed_path)
                print "Finding coordinates.."
                for location in location_names:
                    try:
                        geolocation = geolocator.geocode(location)
                        points.append({
                            'loc_name': "{0}".format(location),
                            'position': {
                                'x': geolocation.longitude,
                                'y': geolocation.latitude
                            }
                        })
                    except:
                        pass
                print "Found {0} coordinates..".format(len(points))
                status = IndexCrawledPoints(core_name, indexed_path.lower(),
                                            points)
                return HttpResponse(status=200, content=status)
            except Exception as e:
                return False
    else:
        pass
Esempio n. 19
0
def query_crawled_index(request, core_name, indexed_path):
    '''
        To query crawled data that has been indexed into
        Solr or Elastichsearch and return location names
    '''
    if "solr" in indexed_path.lower():
        if IndexFile(core_name, indexed_path.lower()):
            location_names = []
            points = []
            query_range = 500
            try:
                url = "{0}/select?q=*%3A*&wt=json&rows=1".format(indexed_path)
                response = urllib2.urlopen(url)
                numFound = eval(response.read())['response']['numFound']
                for row in range(0, int(numFound), query_range):
                    query_url = "{0}/select?q=*%3A*&start={1}&rows={2}&wt=json".format(indexed_path, row, row+query_range)
                    places = geograpy.get_place_context(url=query_url)
                    location_names.append(places.regions)
                    location_names.append(places.countries)
                    location_names.append(places.cities)
                    location_names.append(places.other)
                    location_names = flatten(location_names)
                print "Found {0} Locations for {1}".format(len(location_names), indexed_path)
                print "Finding coordinates.." 
                for location in location_names:
                    try:
                        geolocation = geolocator.geocode(location)
                        points.append(
                            {'loc_name': "{0}".format(location),
                            'position':{
                                'x': geolocation.longitude,
                                'y': geolocation.latitude
                                    }
                            }
                        )
                    except:
                        pass
                print "Found {0} coordinates..".format(len(points))
                status = IndexCrawledPoints(core_name, indexed_path.lower(), points)
                return HttpResponse(status=200, content=status)
            except Exception as e:
                return False
    else:
        pass
Esempio n. 20
0
def run(data):
    if 'coords' in data['geo']:
        return data

    if 'fromURL' in data and data['source'] in ['gdelt']:
        kwargs = {
            'url': data['fromURL']
        }

    else:
        if 'contentEnglish' in data:
            field = 'contentEnglish'
        else:
            field = 'content'

        kwargs = {
            'text': data[field]
        }

    try:    
        pc = geograpy.get_place_context(**kwargs)
    except Exception, e:
        return data
Esempio n. 21
0
def extract_venue(title):
    places = geograpy.get_place_context(text=title).cities
    if places:
        return ','.join(places)
    else:
        return None
Esempio n. 22
0
def read_url(
    (url, date)
):  ##accepting first parameter as tuple, because pool() does not accept multiple arguments for functions

    #connect to MySQL database on AWS
    db = MySQLDatabase('gdelt', user='******', passwd='***********')

    #model class: database table named crawler
    #field instance: creating columns
    class Crawler(peewee.Model):
        Country = peewee.CharField()
        Title = peewee.TextField()
        Websiteurl = peewee.TextField()
        Date = peewee.DateField()
        Keyword1 = peewee.CharField()
        Keyword2 = peewee.CharField()
        Keyword3 = peewee.CharField()

        class Meta:
            database = db

    db.connect()

    db.create_tables(
        [Crawler], True
    )  # runs SQL CREATE TABLE statement (only has to be run once).  peewee will first check if table has already been created

    ## identifying associated country
    mentioned_country = str('NA')
    try:
        places = geograpy.get_place_context(url=url)
        mentioned_country = places.countries[0].encode('utf-8')
    except:
        pass

    ##identifying title and associated keywords
    mentioned_title = str('NA')
    mentioned_keyword1 = str('NA')
    mentioned_keyword2 = str('NA')
    mentioned_keyword3 = str('NA')
    # try:
    #     article = Article(url)
    #     article.download()
    #     article.parse()
    # except Exception:
    #     pass
    # try:
    #     mentioned_title = article.title
    # except Exception:
    #     pass
    # try:
    #     article.nlp()
    #     keywords = article.keywords()
    # except Exception:
    #     pass
    # try:
    #     mentioned_keyword1 = keywords[0]
    # except Exception:
    #     pass
    # try:
    #     mentioned_keyword2 = keywords[1]
    # except Exception:
    #     pass
    # try:
    #     mentioned_keyword3 = keywords[2]
    # except Exception:
    #     pass

    ##inserting into SQL
    if (mentioned_country != 'NA'):
        Crawler.create(Country=mentioned_country,
                       Title=mentioned_title,
                       Websiteurl=url,
                       Date=date,
                       Keyword1=mentioned_keyword1,
                       Keyword2=mentioned_keyword2,
                       Keyword3=mentioned_keyword3)

    db.close()

    return
Esempio n. 23
0
def getGeoGraphy(text):
    places = geograpy.get_place_context(text=text)
    return places.countries, places.regions, places.cities
Esempio n. 24
0
 def run(self):
     context = geograpy.get_place_context(text=self.text)
     self.places = context.places
     self.people = context.people
     self.organs = context.organs
nltk.downloader.download('words')
nltk.downloader.download('treebank')
nltk.downloader.download('maxent_treebank_pos_tagger')
nltk.downloader.download('punkt')
#
# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')
# nltk.download('maxent_ne_chunker')
# nltk.download('words')
# sentence = "I am from Kadawatha"
# places = GeoText(sentence)
# print (places.cities)

text = "Kadawatha Opposition Leader Mahinda Rajapaksa says that the whole  public administration has collapsed due to the constitution council’s arbitrary actions. " \
       "The Opposition Leader said so in response to a query a journalised raised  after a meeting held in Malabe and Meegamuwa"
places = geograpy.get_place_context(text=text)
print(places.places)
# url = 'http://www.bbc.com/news/world-europe-26919928'
# places = geograpy.get_place_context(url=url)
# print(places.cities)

print("****************************************************")

#
# text6 = u"""Opposition Leader Mahinda Rajapaksa says that the whole public administration has collapsed due to the constitution council’s arbitrary actions.
# The Opposition Leader said so in response to a query a journalised raised  after a meeting held.."""
# e6 = Extractor(text=text6)
# e6.find_entities()
#print(e6.places)

# print("****************************************************")
Esempio n. 26
0
                                        division = 'Minimumweight'
                                    divisionquery = 'Update BoxerData set Division = {2}{0}{2} where BoxerId = {1}'.format(
                                        division, boxerId, singleQuote)
                                    updateCursor.execute(divisionquery)
                                    updateCursor.commit()
                                except:
                                    print('no division')
                                #write nationality

                                try:
                                    country = birthDate[
                                        1]  #.encode(encoding='utf_16',errors='strict')
                                    country = str(country)
                                    country = country.lstrip()
                                    print(country)
                                    places = geograpy.get_place_context(
                                        text=country)
                                    nationalityquery = 'Update BoxerData set Nationality = {2}{0}{2} where BoxerId = {1}'.format(
                                        places.countries[1], boxerId,
                                        singleQuote)
                                    updateCursor.execute(nationalityquery)
                                    updateCursor.commit()
                                except:
                                    print('no nationality')
                                #write gender
                                #write stance
                                try:
                                    stancequery = 'Update BoxerData set Stance = {2}{0}{2} where BoxerId = {1}'.format(
                                        stance, boxerId, singleQuote)
                                    updateCursor.execute(stancequery)
                                    updateCursor.commit()
                                except:
import csv
import geograpy

h_file = open('hurricanes.csv')
o_file = open('computed_areas_geograpy.csv', 'wb')

hurricanes = csv.reader(h_file)

# Create Writer Object
wr = csv.writer(o_file, dialect='excel')

for row in hurricanes:
  h_uri = str(row[0])
  h_abstract = str(row[1]).decode('utf-8', 'ignore')
  
  places = geograpy.get_place_context(text=h_abstract)
  
  countries = list(set(places.countries))
  regions = list(set(places.regions))
  
  #print('Row #' + str(hurricanes.line_num) + ' ' + str(places.countries) + ' ' + str(places.regions) + ' ' + str(places.cities) + ' ' + str(places.other))
#   print('Row #' + str(hurricanes.line_num) + ' ' + str(countries) + ' ' + str(regions))
  
  if len(countries) != 0 :
    print('Countries: ')
    for country in countries:
      print(country)
      wr.writerow([h_uri, country])
  
  if len(regions) != 0 :
    print('Regions: ')
    for post in feedParsed.entries:
        feedTitle.append(post.title)
        feedContent.append(post.summary)
        print("feed " + str(entityCount) + " : " + post.title)
        entityCount = entityCount + 1

# places = geograpy.get_place_context(text=feedList[1])
# placesInFeed.append(places.places)
# print("places - %s" % [str(x) for x in placesInFeed])

print("Processing....")
for content in feedContent:

    if content != "":
        place = geograpy.get_place_context(text=content)
        placesInFeed.append(place.places)

    else:
        placesInFeed.append("null")

k = 1
for place in placesInFeed:
    print("place " + str(k) + " - %s" % str(place))
    k = k + 1

print(
    "############################################################################################################"
)
print(
    "############################################################################################################"
project_long_description = brazil_df['project_long_description'].values.tolist()
project_all = zip(project_title, project_long_description)

#for cell_title in brazil_df['project_title'] and cell_long in brazil_df['project_long_description']:
#for cell in brazil_df['project_title'],brazil_df['project_long_description']:
for cell in project_all:
    #print cell_title
    #print cell_long
    #cell = cell_title + cell_long
    #print cell
    #cell = ", ".join(cell)
    #print cell

    try:
        if not pd.isnull(cell[0]):
            placesInCell1 = geograpy.get_place_context(text=cell[0]).countries
        else:
            placesInCell1 = []
        if not pd.isnull(cell[1]):
            placesInCell2 = geograpy.get_place_context(text=cell[1]).countries
        else: 
            placesInCell2 = []

        placesInCell = placesInCell1 + placesInCell2

        if placesInCell:

            if "United States" in placesInCell:
                if not pd.isnull(cell[0]):
                    if ((" US " not in cell[0]) and (" USA " not in cell[0]) and (" United States Of America " not in cell[0]) and ("United States" not in cell[0]) ):
                        if not pd.isnull(cell[1]):
Esempio n. 30
0
    tweet = re.sub(r'[' + string.punctuation + ']+', ' ',
                   tweet)  # Remove punctuations
    twtok = TweetTokenizer(strip_handles=True, reduce_len=True)
    tokens = twtok.tokenize(tweet)
    tokens = [
        i.lower() for i in tokens
        if i not in stopwords and len(i) > 2 and i in english_vocab
    ]
    return tokens


words = []
places = []
#for tw in tweets_text:
for tw in tweets_text[:1000]:
    words += process_tweets_texts(tw)
    places.append(geo.get_place_context(text=tw))

for tw in tweets_text[1000:2000]:
    words += process_tweets_texts(tw)
    places.append(geo.get_place_context(text=tw))

city = []
for p in range(len(places)):
    pl = places[p].cities
    for i in pl:
        if i in cities:
            city.append(i)

print(city)
Esempio n. 31
0
def extract_city(text):
    city = geograpy.get_place_context(text=text)
    real_city = city.cities
    return real_city
Esempio n. 32
0
api_url = 'http://92.62.139.201:8080/api/geonames/countries'

print("Opening input file '%s'..." % input_file_path)

with open(input_file_path, 'r', encoding='utf-8') as file:
    text = file.read().replace('\n', ' ')

chunk_size = 50000
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
output = {}
results = []

print("Searching for geo names in '%s' language..." % language_code)

for chunk in chunks:
    places = get_place_context(text=chunk).countries
    temp_results = {}

    for place in places:
        if place[0].isupper() and place not in results and place in chunk:
            geo_names = get(api_url, {
                'name': place,
                'isolanguage': language_code
            }).json()

            if len(geo_names) > 0:
                pos = chunk.find(place)
                output[pos] = geo_names
                temp_results[pos] = place
                results.append(place)
Esempio n. 33
0
def extract_country(affiliation):
    places = geograpy.get_place_context(text=affiliation)
    try:
        return places.country_mentions[0][0]
    except IndexError, e:
        return ""
Esempio n. 34
0
def cityDic(places):
    geolocator = Nominatim(user_agent="specify_your_app_name_here")
    geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
    place_dicts = []
    for place in places:
        place_dict = {"text":place, "address":"", "latitude":"", "longtitude":""}
        location = geocode(place)
        if location:
            place_dict["address"] = location.address
            point = tuple(location.point)
            place_dict["latitude"] = point[0]
            place_dict["longtitude"] = point[1]

        place_dicts.append(place_dict)
    return place_dicts

if __name__ == '__main__':
    args = get_args()
    data = load_from_json(args.data)

    place_tags = []
    # TODO : Process only sentences with label 1
    for sentence in data["sentences"]:
        places = geograpy.get_place_context(text=sentence)
        place_dicts = cityDic(places.cities) # Only cities ???
        place_tags.append(place_dicts)

    data["place_tags"] = place_tags
    write_to_json(data, data["id"], extension="json", out_dir=args.out_dir)
Esempio n. 35
0
def extract_location_from_text(text):
    #https://stackoverflow.com/questions/40517720/python-geograpy-unable-to-run-demo
    places = geograpy.get_place_context(text=text)
    return places.country_mentions
Esempio n. 36
0
import folium
from folium.plugins import Fullscreen

m = folium.Map(location=[10, 0], zoom_start=2.1)
Fullscreen().add_to(m)

m.save(outfile='fullscreen.html')

import geograpy
url = 'http://www.bbc.com/news/world-europe-26919928'
places = geograpy.get_place_context(url=url)

folium.Marker([45.3288, -121.6625], popup='Mt. Hood Meadows').add_to(m)
Esempio n. 37
0
import geograpy
from geograpy import extraction
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
   
places = geograpy.get_place_context(text="The National Air and Space Museum of the Smithsonian Institution, also called the Air and Space Museum, is a museum  ‘in Washington, D.C. It was established in 1946 as the National Air Museum and opened its main building on the National Mail near L’Enfant Plaza in 1976. In 2018, the museum saw approximately 6.2 million visitors, making it the fifth most visited  _ museum in the world, and the second most visited museum in the United States.!°] The museum contains the Apollo 11  _ Command Module Columbia, the Friendship 7 capsule which was flown by John Glenn, Charles Lindbergh's Spirit of St.  -ouis, the Bell X-1 which broke the sound barrier, the model of the starship Enterprise used in the science fiction television jow Star Trek: The Original Series, and the Wright brothers' Wright Flyer airplane near the entrance.")
places = places.regions
gelocator = Nominatim(user_agent='google_api')
lat_lon = []
for place in places:
    try:
        location = gelocator.geocode(place)
        if location:
            lat_lon.append([location.latitude, location.longitude])
    except GeocoderTimedOut:
        continue

print(places)