예제 #1
0
from bix.twitter.base.utils import load_model_mat
from bix.twitter.fetch.download_tweets import download_tweets_twint
from bix.twitter.fetch.fetch_config import FetchConfig

from bix.twitter.preprocessing.preprocess import preprocess, tokenize_cleaned_tweets

# before you run this script, set the execution folder for this script to where the tokenizer and the model are saved
if __name__ == '__main__':

    # step 1: fetch tweets (for categorizing hashtags)
    hashtags = [
        'brexit', 'lol'
    ]  # these should match the hashtags, the model was created with
    config = FetchConfig()
    config.from_date = date.today()  # fetch all tweets from today
    config.to_date = date.today() + timedelta(days=1)  # to_date is exclusive
    config.max_tweets_per_fetch = 10
    tweets = download_tweets_twint(hashtags, config)

    # step 2: cleanup
    cleaned_tweets = preprocess(tweets)

    # step 3: tokenization (using the tokenizer created in the build_model.py script)
    tokenized_tweets = tokenize_cleaned_tweets(cleaned_tweets)
    encoded_categories = {hashtags[i]: i for i in range(len(hashtags))}
    y = []  # eg. 0,0,0,1,1,1,1
    for hashtag, tweets in tokenized_tweets.items():
        for item in tweets:
            y.append(encoded_categories[hashtag])
    x = numpy.concatenate(list(tokenized_tweets.values()))
예제 #2
0
from datetime import date, timedelta

from bix.twitter.fetch.fetch_config import FetchConfig
from bix.twitter.fetch.twint_api.twint_fetcher import TwintFetcher

if __name__ == '__main__':
    hashtags = ['brexit']

    config = FetchConfig()
    config.to_date = date.today() + timedelta(days=1)
    config.from_date = date.today()

    tf = TwintFetcher()
    tf.fetch_many(hashtags, config)
예제 #3
0
from datetime import date, timedelta

from bix.twitter.fetch.fetch_config import FetchConfig
from bix.twitter.fetch.twint_api.twint_fetcher import TwintFetcher

if __name__ == '__main__':
    hashtags = ['love', 'sad']
    # love, sad

    config = FetchConfig()
    config.to_date = date.today()
    config.from_date = date.today() - timedelta(days=1)

    tf = TwintFetcher()
    tf.fetch_many(hashtags, config)