コード例 #1
0
ファイル: examples.py プロジェクト: ArecRui/twitterresearch
def hydrate(idlist_file="data/example_dataset_tweet_ids.txt"):
    """
    This function reads a file with tweet IDs and then loads them
    through the API into the database. Prepare to wait quite a bit,
    depending on the size of the dataset.
    """
    ids_to_fetch = set()
    for line in open(idlist_file, "r"):
        # Remove newline character through .strip()
        # Convert to int since that's what the database uses
        ids_to_fetch.add(int(line.strip()))
    # Find a list of Tweets that we already have
    ids_in_db = set(t.id for t in database.Tweet.select(database.Tweet.id))
    # Sets have an efficient .difference() method that returns IDs only present
    # in the first set, but not in the second.
    ids_to_fetch = ids_to_fetch.difference(ids_in_db)
    logging.warning(
        "\nLoaded a list of {0} tweet IDs to hydrate".format(len(ids_to_fetch)))

    # Set up a progressbar
    bar = Bar('Fetching tweets', max=len(ids_to_fetch), suffix='%(eta)ds')
    for page in rest.fetch_tweet_list(ids_to_fetch):
        bar.next(len(page))
        for tweet in page:
            database.create_tweet_from_dict(tweet)
    bar.finish()
    logging.warning("Done hydrating!")
コード例 #2
0
def hydrate(idlist_file="data/example_dataset_tweet_ids.txt"):
    """
    This function reads a file with tweet IDs and then loads them
    through the API into the database. Prepare to wait quite a bit,
    depending on the size of the dataset.
    """
    ids_to_fetch = set()
    for line in open(idlist_file, "r"):
        # Remove newline character through .strip()
        # Convert to int since that's what the database uses
        ids_to_fetch.add(int(line.strip()))
    # Find a list of Tweets that we already have
    ids_in_db = set(t.id for t in database.Tweet.select(database.Tweet.id))
    # Sets have an efficient .difference() method that returns IDs only present
    # in the first set, but not in the second.
    ids_to_fetch = ids_to_fetch.difference(ids_in_db)
    logging.warning("\nLoaded a list of {0} tweet IDs to hydrate".format(
        len(ids_to_fetch)))

    # Set up a progressbar
    bar = Bar('Fetching tweets', max=len(ids_to_fetch), suffix='%(eta)ds')
    for page in rest.fetch_tweet_list(ids_to_fetch):
        bar.next(len(page))
        for tweet in page:
            database.create_tweet_from_dict(tweet)
    bar.finish()
    logging.warning("Done hydrating!")
コード例 #3
0
def import_json(fi):
    """
    Load json data from a file into the database.
    """
    logging.warning("Loading tweets from json file {0}".format(fi))
    for line in open(fi, "rb"):
        data = json.loads(line.decode('utf-8'))
        database.create_tweet_from_dict(data)
コード例 #4
0
ファイル: examples.py プロジェクト: ArecRui/twitterresearch
def import_json(fi):
    """
    Load json data from a file into the database.
    """
    logging.warning("Loading tweets from json file {0}".format(fi))
    for line in open(fi, "rb"):
        data = json.loads(line.decode('utf-8'))
        database.create_tweet_from_dict(data)
コード例 #5
0
def save_user_archive_to_database():
    """
    Fetch all available tweets for one user and save them to the database.
    """
    archive_generator = rest.fetch_user_archive("lessig")
    for page in archive_generator:
        for tweet in page:
            database.create_tweet_from_dict(tweet)
    logging.warning(u"Wrote tweets from @lessig to database")
コード例 #6
0
ファイル: examples.py プロジェクト: ArecRui/twitterresearch
def save_user_archive_to_database():
    """
    Fetch all available tweets for one user and save them to the database.
    """
    archive_generator = rest.fetch_user_archive("lessig")
    for page in archive_generator:
        for tweet in page:
            database.create_tweet_from_dict(tweet)
    logging.warning(u"Wrote tweets from @lessig to database")
コード例 #7
0
def load_from_files(files, searchterm):
    # Files is a list of json files, searchterm is the search used
    for file in files:
        print("File ", file)
        logger.info("file %s", file)
        for tweet in iterate_file(file):
            if tweet:
                database.create_tweet_from_dict(tweet, searchterm)
            else:
                continue
    return
コード例 #8
0
def add_to_database(tweets, searchterm):

    counter = 0
    print(len(tweets))
    for tweet in tweets:
        if tweet:
            data = json.loads(tweet.AsJsonString())
            t = mytools.create_tweet_from_dict(data, searchterm)
            if t:
                counter += 1
            else:
                logging.error("Did not save tweet %s" % data["id"])
        else:
            continue
    return counter