Ejemplo n.º 1
0
def draw_state_sentiments(state_sentiments):
    """Draw all U.S. states in colors corresponding to their sentiment value.

    Unknown state names are ignored; states without values are colored grey.

    Arguments:
    state_sentiments -- A dictionary from state strings to sentiment values
    """
    for name, shapes in us_states.items():
        draw_state(shapes, state_sentiments.get(name))
    for name, shapes in us_states.items():
        center = find_state_center(shapes)
        if center is not None:
            draw_name(name, center)
Ejemplo n.º 2
0
def group_tweets_by_state(tweets): #alocar tweets correspostendentes a cada estado em sua devida key no dicionário
    """Return a dictionary that aggregates tweets by their nearest state center.


    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.


    tweets -- a sequence of tweet abstract data types


    >>> sf = make_tweet("Welcome to San Francisco", None, 38, -122)
    >>> ny = make_tweet("Welcome to New York", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"Welcome to San Francisco" @ (38, -122)'
    """
    tweets_by_state = {} #inicia como um dicionário vazio
    "*** YOUR CODE HERE ***"
    centros = {x: find_center(y) for x, y in us_states.items()}
    #o dicionario acima é responsável por armazenar o centroide do poligono de poligonos que compõem um dado estado (cada x representa um estado)
    #e o Y retorna a sigla correspondente ao estado com esse centroide
    #X é a chave e Y o valor

    for z in us_states:
        tweets_by_state[z] = [] #cria-se as chaves dos dicionários
        
    for tw in tweets: #analisaremos agora cada tweet armazenado em 'tweets'
        localizacao = find_closest_state(tw, centros) #atribui a 'localizacao' qual estado é o mais proximo (ex: 'ca'), usando a função 'find_clo...', que pega
                                                                                                       #o tweet (e sua lat & lon) e compara com a menor distancia entre
                                                                                                        #ele e o centro de estado mais próximo (verificar função acima)
        tweets_by_state[localizacao].append(tw) #anexa o tweet a sua chave correspondente (ao seu estado correspondente)
                
    return tweets_by_state
Ejemplo n.º 3
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE ***"
    state_centers = {n: find_center(s)
                     for n, s in us_states.items()
                     }  # dictionary of all state: state center
    states_list = [
        find_closest_state(tweet, state_centers) for tweet in tweets
    ]  # list of all states in tweets
    states = set(
        states_list
    )  # set of all state_names involved in tweets, with repeated state_names removed
    # list_of_tweet_from(state_name) returns a list of all tweets from a given state_name
    list_of_tweet_from = lambda state_name: [
        tweet for tweet in tweets
        if find_closest_state(tweet, state_centers) == state_name
    ]
    tweets_by_state = {state: list_of_tweet_from(state) for state in states}
    return tweets_by_state
Ejemplo n.º 4
0
def most_talkative_state(term):
    """Return the state that has the largest number of tweets containing term.

    >>> most_talkative_state('texas')
    'TX'
    >>> most_talkative_state('sandwich')
    'NJ'
    """
    tweets = load_tweets(make_tweet, term)  # A list of tweets containing term
    estados = {}
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    for tw in tweets:
        estado = find_closest_state(tw, us_centers)
        if estado not in estados:
            estados[estado] = 0
        palavras = extract_words(tw['text'])
        for p in palavras:
            if term == p:
                estados[estado] += 1

    #maior
    maior = ""
    for x in estados:  #pega um elemento
        maior = x
        break
    for tw in estados:  #pega o maior valor
        if estados[tw] > estados[maior]:
            maior = tw

    return maior
Ejemplo n.º 5
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE ***"
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    #For every tweet in dictionary
    for tweet in tweets:
        #If state not found in dictionary
        if (find_closest_state not in tweets_by_state):
            #Creates key for it
            tweets_by_state[find_closest_state(tweet, us_centers)] = []
            #Adds tweet to key
            tweets_by_state[find_closest_state(tweet,
                                               us_centers)].append(tweet)
        else:  #If state found in dictionary
            #Adds tweet to key
            tweets_by_state[find_closest_state(tweet,
                                               us_centers)].append(tweet)

    return tweets_by_state  #Returns tweets according to their states (dictionary)
Ejemplo n.º 6
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    "*** YOUR CODE HERE ***"
    from collections import defaultdict
    tweets_by_state = defaultdict(lambda: None)
    us_centers = {n: find_state_center(s) for n, s in us_states.items()}
    for tweet in tweets:
        dist_from_center = lambda name: geo_distance(tweet_location(tweet),
                                                     us_centers[name])
        state = sorted(us_states.keys(), key=dist_from_center)[0]
        if tweets_by_state[state] is None:
            tweets_by_state[state] = [tweet]
        else:
            tweets_by_state[state].append(tweet)
    return tweets_by_state
Ejemplo n.º 7
0
Archivo: trends.py Proyecto: Jwsa/p1
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("Welcome to San Francisco", None, 38, -122) 
    >>> ny = make_tweet("Welcome to New York", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"Welcome to San Francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE***"
    us_centers = {
        n: find_center(s)
        for n, s in us_states.items()
    }  #cria um dicionario que liga a chave "N :" ao centro de S onde n e s estão em "us_states.items()"

    for state in us_states:
        tweets_by_state[state] = []  # ele cria uma lista para cada state .

    for position in tweets:
        closest_state = find_closest_state(position, us_centers)
        tweets_by_state[closest_state].append(position)

    return tweets_by_state
Ejemplo n.º 8
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE ***"
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    for s in tweets:
        if find_closest_state(s, us_centers) in tweets_by_state:
            tweets_by_state[find_closest_state(s, us_centers)].append(s)
        else:
            tweets_by_state[find_closest_state(s, us_centers)] = [s]

    #tweets_by_state = {find_closest_state(s, us_centers): s for s in tweets}
    #tweets_by_state = {find_closest_state(s, us_centers): list(s['text'])}
    return tweets_by_state
Ejemplo n.º 9
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    USA = {}
    for n, s in us_states.items():
        USA[n] = find_state_center(s)#USA becomes a dictionary of all states and their respective centers
    for tweet in tweets:
        stateName = findClosestState(tweet, USA)#we need to determine which state the tweet is from.... and I'm lazy and I hate nasty code so I'm making another method
        tweets_by_state.setdefault(stateName,[]).append(tweet)#if there isn't a list already there, make one... if there is then append the tweet
    return tweets_by_state
Ejemplo n.º 10
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}

    us_centers = {n: find_center(s) for n, s in us_states.items()}

    for t in tweets:
        a = find_closest_state(t, us_centers)
        if a in tweets_by_state:
            tweets_by_state[a].append(t)
        else:
            tweets_by_state[a] = [t]
    return tweets_by_state
Ejemplo n.º 11
0
def most_talkative_state(term):
    """Return the state that has the largest number of tweets containing term.

    >>> most_talkative_state('texas')
    'TX'
    >>> most_talkative_state('sandwich')
    'NJ'
    """
    tweets = load_tweets(make_tweet, term)  # A list of tweets containing term
    "*** YOUR CODE HERE ***"
    aux = 0
    aux1 = 0
    resultado = 0
    dicio = {}
    sc = {n: find_center(s) for n, s in us_states.items()}
    for x in sc:
        dicio[x] = []
    for x in tweets:
        aux = find_closest_state(x, sc)
        dicio[aux].append(1)
    for x in dicio:
        aux = len(dicio[x])
        if aux > aux1:
            resultado = x
            aux1 = aux
    return resultado
Ejemplo n.º 12
0
def average_sentiments(tweets_by_state):
    """Calculate the average sentiment of the states by averaging over all
    the tweets from each state. Return the result as a dictionary from state
    names to average sentiment values (numbers).

    If a state has no tweets with sentiment values, leave it out of the
    dictionary entirely.  Do NOT include states with no tweets, or with tweets
    that have no sentiment, as 0.  0 represents neutral sentiment, not unknown
    sentiment.

    tweets_by_state -- A dictionary from state names to lists of tweets
    """
    averaged_state_sentiments = {}
    "*** YOUR CODE HERE ***"
    us_centers = {n: find_center(s) for n, s in us_states.items()}

    for state in tweets_by_state:  #For every state with tweets
        states = []
        total = 0
        for tweet in tweets_by_state[
                state]:  #Search through every tweet in that state
            if (has_sentiment(analyze_tweet_sentiment(tweet))
                ):  #If tweet has sentiment value
                states.append(
                    state)  #Append state to list so amount can be averaged
                total += sentiment_value(analyze_tweet_sentiment(
                    tweet))  #Add sentiment value to total
                averaged_state_sentiments[state] = total / len(
                    states)  #Average over for the state

    #Return the averaged state sentiments
    return averaged_state_sentiments
Ejemplo n.º 13
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.
    
    tweets -- a sequence of tweet abstract data types    

    >>> sf = make_tweet("Welcome to San Francisco", None, 38, -122)
    >>> ny = make_tweet("Welcome to New York", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"Welcome to San Francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE ***"
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    tweets_num = len(tweets)

    i = 0
    while i < tweets_num:
        tweets_by_state[find_closest_state(tweets[i], us_centers)] = []
        i += 1
    i = 0
    while i < tweets_num:
        tweets_by_state[find_closest_state(tweets[i],
                                           us_centers)].append(tweets[i])
        i += 1
    return tweets_by_state
Ejemplo n.º 14
0
def most_talkative_state(term):
    """Return the state that has the largest number of tweets containing term.

    If multiple states tie for the most talkative, return any of them.

    >>> most_talkative_state('texas')
    'TX'
    >>> most_talkative_state('soup')
    'CA'
    """
    tweets = load_tweets(make_tweet, term)  # A list of tweets containing term
    "*** YOUR CODE HERE ***"
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    state = []

    #For every tweet in the loaded tweets
    for tweet in tweets:
        #Search through words of text
        for word in tweet_words(tweet):
            if (word == term):  #If term is found
                #Append corresponding state to list
                state.append(find_closest_state(tweet, us_centers))

    #Find the state with the most occurances of the term and return it
    return max(set(state), key=state.count)
Ejemplo n.º 15
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("Welcome to San Francisco", None, 38, -122)
    >>> ny = make_tweet("Welcome to New York", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"Welcome to San Francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE ***"
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    for n in range (0,len(tweets)):
        location_of_tweet=find_closest_state(tweets[n],us_centers)
        
        if location_of_tweet in tweets_by_state:
            tweets_by_state[location_of_tweet]+=[tweets[n],]
        else:
            tweets_by_state[location_of_tweet]=[tweets[n],]
    return tweets_by_state
Ejemplo n.º 16
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("Welcome to San Francisco", None, 38, -122)
    >>> ny = make_tweet("Welcome to New York", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"Welcome to San Francisco" @ (38, -122)'
    """

    tweets_by_state = {}
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    for x in tweets:
        estado = find_closest_state(x, us_centers)
        if not (estado) in tweets_by_state:
            tweets_by_state[estado] = []
            tweets_by_state[estado] += [x]
        else:
            tweets_by_state[estado] += [x]

    return tweets_by_state
Ejemplo n.º 17
0
 def closest_state(position):
     states_center = {n: find_state_center(s) for n, s in us_states.items()}
     distance = 3963.2  #earth_radius in miles
     closest = ''
     for state, center in states_center.items():
         if geo_distance(position, center) < distance:
             distance = geo_distance(position, center)
             closest = state
     return closest.upper()
Ejemplo n.º 18
0
def draw_centered_map(center_state='TX', n=10):
    """Draw the n states closest to center_state."""
    us_centers = {n: find_state_center(s) for n, s in us_states.items()}
    center = us_centers[center_state.upper()]
    dist_from_center = lambda name: geo_distance(center, us_centers[name])
    for name in sorted(us_states.keys(), key=dist_from_center)[:int(n)]:
        draw_state(us_states[name])
        draw_name(name, us_centers[name])
    draw_dot(center, 1, 10)  # Mark the center state with a red dot
    wait()
Ejemplo n.º 19
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE ***"
   
    list_center_states=[]
    for state,polygons in us_states.items():
        list_center_states+=[find_state_center(polygons)]

    def state_of_tweet(t):
        i, counter= 0, 0
        distance_low=geo_distance(tweet_location(t), list_center_states[0])
        for each in list_center_states:
            i+=1
            distance=geo_distance(tweet_location(t), each)
            if distance<distance_low:
                distance_low=distance
                counter=i-1
        def count(x):
            a=0
            for i in x:
                a+=1
                if a-1==counter:
                    state=i
            return state
        return count(us_states)

    for i in tweets:
        if state_of_tweet(i) in tweets_by_state:
            tweets_by_state[state_of_tweet(i)]+=[i,]
        else:
            tweets_by_state[state_of_tweet(i)]=[i,]

    return tweets_by_state
Ejemplo n.º 20
0
def draw_centered_map(center_state='TX', n=10):
    """Draw the n states closest to center_state.
    
    For example, to draw the 20 states closest to California (including California):

    # python3 trends.py CA 20
    """
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    center = us_centers[center_state.upper()]
    dist_from_center = lambda name: geo_distance(center, us_centers[name])
    for name in sorted(us_states.keys(), key=dist_from_center)[:int(n)]:
        draw_state(us_states[name])
        draw_name(name, us_centers[name])
    draw_dot(center, 1, 10)  # Mark the center state with a red dot
    wait()
Ejemplo n.º 21
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    """
    
    tweets_by_state = {key:[] for key in us_states.keys()} 
    for t in tweets:
        state = find_closest_state(t,{n: find_center(s) for n, s in us_states.items()})
        tweets_by_state[state].append(t)
    return tweets_by_state
Ejemplo n.º 22
0
def closest_state(tweets):
    state_center = {n: find_state_center(s) for n, s in us_states.items()}
    position = tweet_location(tweets)
    state_distances = {}
    state_name = ""

    for element in state_center:
        distance = geo_distance(position, state_center[element])
        state_distances[element] = distance

    for element in state_distances:
        if distance > state_distances[element]:
            distance = state_distances[element]
            state_name = element

    return state_name
Ejemplo n.º 23
0
def most_talkative_state(term):
    """Return the state that has the largest number of tweets containing term.

    >>> most_talkative_state('texas')
    'TX'
    >>> most_talkative_state('sandwich')
    'NJ'
    """
    tweets = load_tweets(make_tweet, term)  # A list of tweets containing term
    state_centers = {n: find_center(s) for n, s in us_states.items()}
    tgbs = group_tweets_by_state(
        tweets
    )  # Stands for "Tweets, Grouped By State" for shortening purposes :P
    most_tweets = 0
    swmt = ''  # "State with Most Tweets"
    for states in tgbs:
        if len(tgbs[states]) > most_tweets:
            most_tweets, swmt = len(tgbs[states]), states
    return swmt
Ejemplo n.º 24
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.
    
    tweets -- a sequence of tweet abstract data types    

    >>> sf = make_tweet("Welcome to San Francisco", None, 38, -122)
    >>> ny = make_tweet("Welcome to New York", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"Welcome to San Francisco" @ (38, -122)'
    """
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    tweets_by_state = {}
    for tweet in tweets:
        key = find_closest_state(tweet, us_centers)
        tweets_by_state.setdefault(key, []).append(tweet)
    return tweets_by_state
Ejemplo n.º 25
0
def count_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their state of origin.

    The keys of the returned dictionary are state names, and the values are
    normalized per capita tweet frequencies. You may use the dictionary
    us_state_pop, which associates state abbreviation keys with 2013 estimated
    population for the given state.

    tweets -- a sequence of tweet abstract data types
    """

    newdict = {}
    state = list(us_states.keys())
    p = 0
    while p < len(state):
        newdict[state[p]] = 0
        p = p + 1
    elm = 0
    while elm < len(tweets):
        x = tweet_location(tweets[elm])
        for i, j in us_states.items():
            if is_in_state(x, us_states[i]) == True:
                newdict[i] += 1
        elm += 1

    for k, v in newdict.items():
        newdict[k] = newdict[k] / us_state_pop[k]

    x = newdict['IL']
    for k, v in newdict.items():
        y = max(newdict[k], x)
        if y > x:
            x = y

    for k, v in newdict.items():
        if x == 0:
            return newdict
        else:
            newdict[k] = newdict[k] / x

    return newdict
Ejemplo n.º 26
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    center_state_dict = {}
    centers = []

    for state, polygons in us_states.items():
        center = find_state_center(polygons)
        centers.append(center)
        center_state_dict[center] = state

    state_centers = KdTree(centers)
    tweets_by_state = {}

    for tweet in tweets:
        location = tweet_location(tweet)
        nearest_state_center = state_centers.nearest_neigbour(
            location).location
        nearest_state = center_state_dict[nearest_state_center]

        if nearest_state not in tweets_by_state:
            tweets_by_state[nearest_state] = []
        tweets_by_state[nearest_state].append(tweet)

    return tweets_by_state
Ejemplo n.º 27
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("Welcome to San Francisco", None, 38, -122)
    >>> ny = make_tweet("Welcome to New York", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"Welcome to San Francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE ***"
    posicao_central_estados = {n: find_center(s) for n, s in us_states.items()}
    for i in tweets:
        chave_estado = find_closest_state(i,posicao_central_estados)
        tweets_by_state.setdefault(chave_estado, [i])
    return tweets_by_state
Ejemplo n.º 28
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("Welcome to San Francisco", None, 38, -122)
    >>> ny = make_tweet("Welcome to New York", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"Welcome to San Francisco" @ (38, -122)'


    #In tweets_by_state['MO'], if you put this doctest into your trends.py file, it should work correctly. If you just read it on Piazza, there'll be double-slashes (\\) on Piazza but not in the Python output. Don't worry about that.
    >>> tweets = load_tweets(make_tweet, 'obama')
    >>> tweets_by_state = group_tweets_by_state(tweets)
    >>> tweets_by_state['MO']
    [{'latitude': 37.17454213, 'text': 'obama: "we will not forget you." // the nation hasn\\'t, but obama must have \\'slept since then.\\'  or gone golfing. #joplin #fema', 'longitude': -95.10078354, 'time': datetime.datetime(2011, 8, 28, 18, 47, 46)}, {'latitude': 37.17454213, 'text': 'obama: "we will not forget you." // the nation hasn\\'t, but obama must have \\'slept since then.\\'  or gone golfing. #joplin #fema', 'longitude': -95.10078354, 'time': datetime.datetime(2011, 8, 28, 18, 47, 46)}, {'latitude': 39.05533081, 'text': 'obama uncle got arrested ! lmao', 'longitude': -94.57681917, 'time': datetime.datetime(2011, 8, 30, 11, 16, 13)}, {'latitude': 38.924711, 'text': 'cnn ipad notification pops up "president obama requests joint" that\\'s all i read. ended with "session of congress" it was cool for a second.', 'longitude': -94.500465, 'time': datetime.datetime(2011, 8, 31, 16, 26, 41)}, {'latitude': 38.44219, 'text': 'rt @ancientproverbs: insanity is doing the same thing in the same way & expecting a different outcome. -chinese proverbslisten up obama!', 'longitude': -90.3041, 'time': datetime.datetime(2011, 8, 29, 12, 57, 57)}, {'latitude': 39.305996, 'text': 'dream ?: better president...dubya, or obama? #gopdebate', 'longitude': -94.47124039, 'time': datetime.datetime(2011, 9, 8, 16, 40, 16)}, {'latitude': 36.84158628, 'text': 'ok, obama say sumthing smart, impress me.', 'longitude': -93.63118948, 'time': datetime.datetime(2011, 9, 8, 23, 8, 20)}, {'latitude': 39.03867236, 'text': "'all jews, pack up your things and head to your nearest train station.' - barack obama", 'longitude': -94.58384525, 'time': datetime.datetime(2011, 9, 8, 23, 20, 7)}, {'latitude': 38.78630813, 'text': 'great message by obama. now, back it up! #standmotv8d', 'longitude': -90.67543365, 'time': datetime.datetime(2011, 9, 8, 23, 42, 32)}, {'latitude': 38.57640134, 'text': 'pass this jobs bill .... pass this jobs bill.... in my obama vc*', 'longitude': -90.40396075, 'time': datetime.datetime(2011, 9, 8, 23, 44, 41)}, {'latitude': 38.9098151, 'text': "good thing that's over.. don't get in the way of football, obama. that would not make you popular to americans..", 'longitude': -94.6897306, 'time': datetime.datetime(2011, 9, 8, 23, 56, 4)}, {'latitude': 37.17959, 'text': "all i hear when obama talks is blah blah blah. he's the poster child for a presidential epic fail #2012hurrypleasehurry", 'longitude': -89.65636, 'time': datetime.datetime(2011, 9, 9, 12, 6, 44)}, {'latitude': 38.66376835, 'text': '@wsj i know obama is studying this!!!', 'longitude': -92.11143839, 'time': datetime.datetime(2011, 9, 2, 20, 24, 50)}]
    >>> len(tweets_by_state['MO'])
    13
    """
    tweets_by_state = {}

    "*** YOUR CODE HERE ***"
    us_centers = {n: find_center(s)
                  for n, s in us_states.items()}  #Lists states
    for tweet in tweets:  #In each tweet find the closest state to group by
        location = find_closest_state(tweet, us_centers)
        if location in tweets_by_state:
            tweets_by_state[location] += [
                tweet,
            ]
        else:
            tweets_by_state[location] = [
                tweet,
            ]

    return tweets_by_state
Ejemplo n.º 29
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE ***"
    
    # dictionary of all state centroids to compare against (find_state_center returns position)
    state_centers = {key: find_state_center(value) for key,value in us_states.items()}

    # for each tweet, find which state centroid it is closest to (use geo_distance from geo.py)
    for tweet in tweets:
        candidate = None, None # state, distance
        for state, center in state_centers.items():
            distance = geo_distance(tweet_location(tweet), center)
            if candidate[0] == None or distance < candidate[1]:
                candidate = state, distance
        if candidate[0] in tweets_by_state:
            tweets_by_state[candidate[0]].append(tweet)
        else:
            tweets_by_state[candidate[0]] = [tweet]

    return tweets_by_state
Ejemplo n.º 30
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("Welcome to San Francisco", None, 38, -122)
    >>> ny = make_tweet("Welcome to New York", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"Welcome to San Francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    us_centers = {ID: find_center(coords) for ID, coords in us_states.items()}
    for tweet in tweets:
        state = find_closest_state(tweet, us_centers)
        if not tweets_by_state.get(state, 0):
            tweets_by_state[state] = [tweet]
        else:
            tweets_by_state[state].append(tweet)
    return tweets_by_state