Esempio n. 1
0
def writeToksToFile():

    tokens, tweets_on_topic, tweets = readToks()

    for topic in TOPICS:

        tokenized_tweets = Tweets()

        for index in tweets_on_topic[topic]:

            tweet = tweets[index]

            tokenized = tokenized_tweets.tweets.add()
            tokenized.tweet = tweet['text']
            for token in tokenize(tweet['text']):
                try:
                    index = tokens.index(token)
                    tokenized.tokens.append(index)
                except ValueError:
                    tokenized.tokens.append(-1)

            print(tokenized.tokens)
            f = open(topic + '.tweets', "wb")
            f.write(tokenized_tweets.SerializeToString())
            f.close()
tweets_on_topic = defaultdict(list)
for topic in topics:
    for index, tweet in enumerate(tweets):
        for keyword in keywords[topic]:
            if keyword in tweet['text'].lower():
                tweets_on_topic[topic].append(index)
                break


for topic in topics:

    tokenized_tweets = Tweets()

    for index in tweets_on_topic[topic]:

        tweet = tweets[index]

        tokenized = tokenized_tweets.tweets.add()
        tokenized.tweet = tweet['text']
        for token in tokenize(tweet['text']):
            try:
                index = tokens.index(token)
                tokenized.tokens.append(index)
            except ValueError:
                tokenized.tokens.append(-1)

        f = open(topic + '.tweets', "wb")
        f.write(tokenized_tweets.SerializeToString())
        f.close()