Exemple #1
0
def show_num_bins(slider_value):
    """ Display the number of bins. """

    df = get_tweet_data()
    total_tweets = len(df)

    return "Total number of tweets streamed during last 60 seconds: " + str(int(total_tweets))
Exemple #2
0
def update_graph_bar(interval):

    # query tweets from the database
    df = get_tweet_data()

    # get the counter for all the tokens
    word_counter = bag_of_words(df.text)

    # get the most common n tokens
    # n is specified by the slider
    top_n = word_counter.most_common(10)[::-1]

    # get the x and y values
    X = [cnt for word, cnt in top_n]
    Y = [word for word, cnt in top_n]

    # plot the bar chart
    bar_chart = go.Bar(
        x=X, y=Y,
        name='Word Counts',
        orientation='h',
        marker=dict(color=chart_colors[::-1])
    )

    # specify the layout
    layout = go.Layout(
            xaxis={
                'type': 'log',
                'autorange': True,
                'title': 'Number of Words'
            },
            height=300,
            plot_bgcolor=app_color["graph_bg"],
            paper_bgcolor=app_color["graph_bg"],
            font={"color": app_color["graph_font"]},
            autosize=True,
            margin=go.layout.Margin(
                l=100,
                r=25,
                b=75,
                t=25,
                pad=4
            ),
        )

    return go.Figure(
        data=[bar_chart], layout=layout
    )
def update_graph_sentiment(interval):

    # query tweets from the database
    DB_FILE = 'tweets.db'
    df = get_tweet_data(DB_FILE)
    # get the number of tweets for each keyword
    cnt = bag_of_words(df['text'])

    # get top-N words
    top_N = cnt.most_common(num_tags_scatter)
    top_N_words = [keyword for keyword, cnt in top_N]

    # preprocess the text column
    df['text'] = df.text.apply(preprocess_nltk)

    sentiments = {keyword: [] for keyword in top_N_words}
    for row in df['text']:
        # print(row)
        for keyword in top_N_words:
            # print(keyword)
            if keyword.lower() in row.lower():
                # print(sid.polarity_scores(row)['compound'])
                sentiments[keyword].append(
                    sid.polarity_scores(row)['compound'])

    # print(sentiments)

    avg_sentiments = {}
    for keyword, score_list in sentiments.items():
        avg_sentiments[keyword] = [np.mean(score_list), np.std(score_list)]

    # get the current time for x-axis
    time = datetime.datetime.now().strftime('%D, %H:%M:%S')
    X_universal.append(time)

    to_pop = []
    for keyword, score_queue in sentiment_dict.items():
        if score_queue:
            while score_queue and (score_queue[0][1] <= X_universal[0]):
                score_queue.popleft()
        else:
            to_pop.append(keyword)

    for keyword in to_pop:
        sentiment_dict.pop(keyword)

    for keyword, score in avg_sentiments.items():
        if keyword not in sentiment_dict:
            sentiment_dict[keyword] = deque(maxlen=30)
            sentiment_dict[keyword].append([score, time])
        else:
            sentiment_dict[keyword].append([score, time])

    new_colors = chart_colors[:len(sentiment_dict)]

    # plot the scatter plot
    data = [
        go.Scatter(x=[time for score, time in score_queue],
                   y=[score[0] for score, time in score_queue],
                   error_y={
                       "type": "data",
                       "array": [score[1] / 30 for score, time in score_queue],
                       "thickness": 1.5,
                       "width": 1,
                       "color": "#000",
                   },
                   name=keyword,
                   mode='markers',
                   opacity=0.7,
                   marker=dict(color=color))
        for color, (
            keyword,
            score_queue) in list(zip(new_colors, sentiment_dict.items()))
    ]

    # specify the layout
    layout = go.Layout(
        xaxis={
            'automargin': False,
            'range': [min(X_universal), max(X_universal)],
            'title': 'Current Time (GMT)',
            'nticks': 2,
        },
        yaxis={
            'autorange': True,
            'title': 'Sentiment Score'
        },
        height=400,
        plot_bgcolor=app_color["graph_bg"],
        paper_bgcolor=app_color["graph_bg"],
        font={"color": app_color["graph_font"]},
        autosize=False,
        legend={
            'orientation': 'v',
            # 'xanchor': 'right',
            # 'yanchor': 'middle',
            # 'x': 0.5,
            # 'y': 1.025
        },
        margin=go.layout.Margin(l=75, r=25, b=70, t=25, pad=4),
    )

    return go.Figure(
        data=data,
        layout=layout,
    )
def update_graph_scatter(n):

    # query tweets from the database
    DB_FILE = 'tweets.db'
    df = get_tweet_data(DB_FILE)

    # get the number of tweets for each keyword
    cnt = bag_of_words(df['text'])

    # get the current time for x-axis
    time = datetime.datetime.now().strftime('%D, %H:%M:%S')
    X_universal.append(time)

    to_pop = []
    for keyword, cnt_queue in scatter_dict.items():
        if cnt_queue:
            while cnt_queue and (cnt_queue[0][1] < X_universal[0]):
                cnt_queue.popleft()
        else:
            to_pop.append(keyword)

    for keyword in to_pop:
        scatter_dict.pop(keyword)

    top_N = cnt.most_common(num_tags_scatter)

    for keyword, cnt in top_N:
        if keyword not in scatter_dict:
            scatter_dict[keyword] = deque(maxlen=30)
            scatter_dict[keyword].append([cnt, time])
        else:
            scatter_dict[keyword].append([cnt, time])

    new_colors = chart_colors[:len(scatter_dict)]

    # plot the scatter plot
    data = [
        go.Scatter(
            x=[time for cnt, time in cnt_queue],
            y=[cnt for cnt, time in cnt_queue],
            name=keyword,
            mode='lines+markers',
            opacity=0.5,
            marker=dict(
                size=10,
                color=color,
            ),
            line=dict(
                width=6,
                # dash='dash',
                color=color,
            ))
        for color, (keyword,
                    cnt_queue) in list(zip(new_colors, scatter_dict.items()))
    ]

    # specify the layout
    layout = go.Layout(
        xaxis={
            'automargin': False,
            'range': [min(X_universal), max(X_universal)],
            'title': 'Current Time (GMT)',
            'nticks': 6
        },
        yaxis={
            'type': 'log',
            'autorange': True,
            'title': 'Number of Tweets'
        },
        height=700,
        plot_bgcolor=app_color["graph_bg"],
        paper_bgcolor=app_color["graph_bg"],
        font={"color": app_color["graph_font"]},
        autosize=False,
        legend={
            'orientation': 'h',
            'xanchor': 'center',
            'yanchor': 'top',
            'x': 0.5,
            'y': 1.025
        },
        margin=go.layout.Margin(l=75, r=25, b=45, t=25, pad=4),
    )

    return go.Figure(
        data=data,
        layout=layout,
    )