예제 #1
0
def geolocate(tag_sentence, username):
    """
    Function to pull location information from a sentence. The location
    is then passed to the `geonames_api` to obtain latitude and
    longitudes for each event.

    Inputs
    ------
    tag_sentence : sentence with POS tags as generated by the NLTK
    function `pos_tag()`. List of tuples.

    username : username for geonames.org. String.

    Returns
    -------

    lat : latitude coordinate

    lon : longitude coordinate

    """
    #TODO: What about two word cities? Baton Rouge, New Orleans, etc.

    #Create bigrams
    bigrams = nk.bigrams(tag_sentence)
    loc = None
    #Words that indicate a location
    keep = ['in', 'to', 'from']
    #Select words from the bigram where the first word is 'to' or 'in'
    #and the second word has a proper noun tag.
    for (w1, t1), (w2, t2) in bigrams:
        if (w1 in keep) and (t2 == 'NNP'):
            loc = w2
    #If the above didn't work, try trigrams
    if not loc:
        trigrams = nk.trigrams(tag_sentence)
        for (w1, t1), (w2, t2), (w3, t3) in trigrams:
            if (t1.startswith('N') and t2 == 'IN' and t3 == 'NNP'):
                loc = w3
    #If it found a location
    if loc:
        #Create parameters to pass to the geonames_api
        loc = nk.stem.PorterStemmer().stem(loc)
        params = geonames_api.make_params({'q': loc})
        #Try to obtain coordinates from geonames
        try:
            lat, lon = geonames_api.get_lat_lon(params, username)
            return lat, lon
        #but if something went wrong, return 'NA' for lat, lon
        except IndexError:
            lat, lon = 'NA', 'NA'
            return lat, lon
    #If a location hasn't been found, return 'NA' for lat, lon
    if not loc:
        lat, lon = 'NA', 'NA'
        return lat, lon
예제 #2
0
def geolocate(trigrams, username):
    """
    Function to pull location information from a sentence. The location
    is then passed to the `geonames_api` to obtain latitude and
    longitudes for each event.

    Parameters
    ------
    username: String.
                Username for geonames.org.

    Returns
    -------

    lat: String.
            latitude coordinate

    lon: String.
            longitude coordinate

    """
    #TODO: What about two word cities? Baton Rouge, New Orleans, etc.

    #Create bigrams
    loc = None
    #Words that indicate a location
    keep = ['in', 'to', 'from']
    #Select words from the bigram where the first word is 'to' or 'in'
    #and the second word has a proper noun tag.
    for (w1, t1), (w2, t2), (w3, t3) in trigrams:
        if (w1 in keep) and (t2 == 'NNP'):
            loc = w2
        elif (t1.startswith('N') and w2 in keep and t3 == 'NNP'):
            loc = w3
    #If it found a location
    if loc:
        #Create parameters to pass to the geonames_api
        loc = nltk.stem.PorterStemmer().stem(loc)
        params = geonames_api.make_params({'q': loc})
        #Try to obtain coordinates from geonames
        try:
            lat, lon = geonames_api.get_lat_lon(params, username)
            return lat, lon
        #but if something went wrong, return 'NA' for lat, lon
        except IndexError:
            lat, lon = 'NA', 'NA'
            return lat, lon
    #If a location hasn't been found, return 'NA' for lat, lon
    if not loc:
        lat, lon = 'NA', 'NA'
        return lat, lon