Beispiel #1
0
def go(args):
    '''
    Call the right function(s) for the task(s) and print the result(s).

    Inputs:
        args: list of strings
    '''

    task = args.task[0]
    if task <= 0 or task > 7:
        print("The task number needs to be a value between 1 and 7 inclusive.",
              file=sys.stderr)
        sys.exit(1)

    if task in [1, 2, 3]:
        ek2vk = {
            "hashtags": "text",
            "urls": "url",
            "user_mentions": "screen_name"
        }

        ek = args.entity_key[0]
        if ek not in ek2vk:
            print("Invalid entitity key:", ek)
            sys.exit(1)
        entity_type = (args.entity_key[0], ek2vk.get(ek, ""))

    tweets = get_json_from_file(args.file[0])

    if task == 1:
        print(find_top_k_entities(tweets, entity_type, args.k[0]))
    elif task == 2:
        print(find_min_count_entities(tweets, entity_type, args.min_count[0]))
    elif task == 3:
        print(find_frequent_entities(tweets, entity_type, args.k[0]))
    elif task == 4:
        print(find_top_k_ngrams(tweets, args.n[0], args.k[0]))
    elif task == 5:
        print(find_min_count_ngrams(tweets, args.n[0], args.min_count[0]))
    elif task == 6:
        print(find_frequent_ngrams(tweets, args.n[0], args.k[0]))
    else:
        result = find_top_k_ngrams_by_month(tweets, args.n[0], args.k[0])
        pretty_print_by_month(result)
Beispiel #2
0
# -*- coding: utf-8 -*-
"""Demo program
This is a demo program of loading the tweets
in the PA #3 writeup
"""

# DO NOT REMOVE THESE LINES OF CODE
# pylint: disable-msg=invalid-name

import util

Conservatives = util.get_json_from_file("data/Conservatives.json")
UKLabour = util.get_json_from_file("data/UKLabour.json")
theSNP = util.get_json_from_file("data/theSNP.json")
LibDems = util.get_json_from_file("data/LibDems.json")

# sample tweet from the "Data" section
tweet0 = UKLabour[651]

# sample tweet from the "Pre-processing step" and "Representing
# N-grams" sections.
tweet1 = UKLabour[55]
Beispiel #3
0
from analyze import get_top_n_entities


def make_model(tweets, n, x_title, y_title):
    '''
    Makes bar chart of n most frequently written hashtags.

    Inputs:
        tweets: a list of tweets
        n: integer
        x_title: x axis title
        y_title: y axis title

	Returns: bar chart data
    '''
    top_hashtags = get_top_n_entities(tweets, ("hashtags", "text"), n)
    labels, freq = zip(*top_hashtags)
    data = {'data': freq, 'x': labels}
    bar = vis.Bar(data, iter_idx='x')
    bar.axis_titles(x=x_title, y=y_title)
    return bar


# Edit this section according to whichever handle you're analyzing
trump_tweets = get_json_from_file("data/trump.json")
make_model(trump_tweets, 10, "Top 10 Hashtags",
           "Frequency").to_json('models/top_hashtags_trump.json')

obama_tweets = get_json_from_file("data/obama.json")
make_model(obama_tweets, 10, "Top 10 Hashtags",
           "Frequency").to_json('models/top_hashtags_obama.json')