Esempio n. 1
0
def correct_positive():
    """correct number of positive tweets."""
    classify = uva.check50.py.run("tweet.py").module.classify

    import helpers
    dates, tweets = helpers.read_tweets("trump.txt")

    positives = helpers.read_words("positive_words.txt")
    negatives = helpers.read_words("negative_words.txt")

    with uva.check50.py.capture_stdout() as stdout:
        classify(tweets, positives, negatives)

    out = stdout.getvalue()

    check_classify(out, "positive", 538)
    return out
Esempio n. 2
0
def n_days():
    """correct number of bad days."""
    bad_days = uva.check50.py.run("tweet.py").module.bad_days

    import helpers
    dates, tweets = helpers.read_tweets("trump.txt")
    positives = helpers.read_words("positive_words.txt")
    negatives = helpers.read_words("negative_words.txt")

    with uva.check50.py.capture_stdout() as stdout:
        bad_days(dates, tweets, positives, negatives)

    out = stdout.getvalue()

    matches = re.findall("[\d]+[^\n^\d]*[\d]+[^\n^\d]*[\d]+", out)
    if not matches or len(matches) != 31:
        n = len(matches) if matches else 0
        raise check50.Failure(f"expected 31 bad days, but found {n} dates!")

    return out
Esempio n. 3
0
def best_words():
    """correct top five positive words."""
    positive_word = uva.check50.py.run("tweet.py").module.positive_word

    import helpers
    dates, tweets = helpers.read_tweets("trump.txt")
    positives = helpers.read_words("positive_words.txt")

    with uva.check50.py.capture_stdout() as stdout:
        positive_word(tweets, positives)

    out = stdout.getvalue()
    top5 = ["great", "trump", "thank", "good", "honor"]

    for word in top5:
        if word not in out:
            raise check50.Mismatch(f"{word}", out)

    for word in set(positives) - set(top5) - {"positive", "top"}:
        if word and word in out:
            raise check50.Failure(f"Did not expect {word} in top 5!")

    return out
Esempio n. 4
0
    # detemine sentiment for a single day
    def determineDaySentiment(acc, sentiment):
        return [
            acc[0], acc[1] +
            (1 if sentiment[1] > 0 else 0 if sentiment[1] == 0 else -1)
        ]

    # detemine sentiments for all days
    daySentiments = [
        reduce(determineDaySentiment, list(day[1]), [day[0], 0])
        for day in groupedSentiments
    ]

    # print bad days
    print("Trump's bad days:")
    [print("   ", day[0]) for day in daySentiments if day[1] < 0]


if __name__ == "__main__":
    # get the dates and tweets from tweet_filename
    dates, tweets = helpers.read_tweets("trump.txt")

    # get the lists of negative and positive words
    positives = helpers.read_words("positive_words.txt")
    negatives = helpers.read_words("negative_words.txt")

    classify(tweets, positives, negatives)
    positive_word(tweets, positives)
    bad_days(dates, tweets, positives, negatives)
Esempio n. 5
0
import collections
from scipy.sparse import csr_matrix
import numpy as np
import time

import helpers
import config

#np.random.seed(42)

# Read tags
tags, tag2idx, tag_count = helpers.read_tags()

# Read words
words, word2idx, word_count = helpers.read_words()

# Clusters
K = tag_count

# Initialize cluster centers
mu = np.random.rand(K, word_count)

# Get chunks
chunk_reader = helpers.ChunkReader(
    post_filename=config.paths.TRAIN_DATA_IDX,
    chunk_size=config.data.CHUNK_SIZE)  # TODO: Change
chunks = [chunk for chunk in chunk_reader]

#with open(config.paths.TRAIN_DATA_IDX, 'r') as f:
for iteration in range(0, config.algorithm.MAX_ITER):
    start = time.time()