Exemple #1
0
def statuses_lookup(db_path, identifiers, attach, sql, auth, skip_existing,
                    silent):
    "Fetch tweets by their IDs"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    if skip_existing:
        existing_ids = set(
            r[0] for r in db.conn.execute("select id from tweets").fetchall())
        identifiers = [i for i in identifiers if int(i) not in existing_ids]
    if silent:
        for batch in utils.fetch_status_batches(session, identifiers):
            utils.save_tweets(db, batch)
    else:
        # Do it with a progress bar
        count = len(identifiers)
        with click.progressbar(
                length=count,
                label="Importing {:,} tweet{}".format(
                    count, "" if count == 1 else "s"),
        ) as bar:
            for batch in utils.fetch_status_batches(session, identifiers):
                utils.save_tweets(db, batch)
                bar.update(len(batch))
 def save_chunk(db, chunk):
     utils.save_tweets(db, chunk)
     # Record who's timeline they came from
     db[table].insert_all(
         [{"user": profile["id"], "tweet": tweet["id"]} for tweet in chunk],
         pk=("user", "tweet"),
         foreign_keys=("user", "tweet"),
         replace=True,
     )
Exemple #3
0
def track(db_path, track, auth, verbose):
    "Experimental: Save tweets matching these keywords in real-time"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    for tweet in utils.stream_filter(session, track=track):
        if verbose:
            print(json.dumps(tweet, indent=2))
        with db.conn:
            utils.save_tweets(db, [tweet])
 def save_chunk(db, search_run_id, chunk):
     utils.save_tweets(db, chunk)
     # Record which search run produced them
     db["search_runs_tweets"].insert_all(
         [{"search_run": search_run_id, "tweet": tweet["id"]} for tweet in chunk],
         pk=("search_run", "tweet"),
         foreign_keys=(
             ("search_run", "search_runs", "id"),
             ("tweet", "tweets", "id"),
         ),
         replace=True,
     )
def favorites(db_path, auth, user_id, screen_name, stop_after):
    "Save tweets favorited by specified user"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    profile = utils.get_profile(db, session, user_id, screen_name)
    with click.progressbar(
        utils.fetch_favorites(session, db, user_id, screen_name, stop_after),
        label="Importing favorites",
        show_pos=True,
    ) as bar:
        utils.save_tweets(db, bar, favorited_by=profile["id"])
Exemple #6
0
def follow(db_path, identifiers, attach, sql, ids, auth, verbose):
    "Experimental: Follow these Twitter users and save tweets in real-time"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    # Make sure we have saved these users to the database
    for batch in utils.fetch_user_batches(session, identifiers, ids):
        utils.save_users(db, batch)
    # Ensure we have user IDs, not screen names
    if ids:
        follow = identifiers
    else:
        follow = utils.user_ids_for_screen_names(db, identifiers)
    # Start streaming:
    for tweet in utils.stream_filter(session, follow=follow):
        if verbose:
            print(json.dumps(tweet, indent=2))
        with db.conn:
            utils.save_tweets(db, [tweet])
Exemple #7
0
def user_timeline(
    db_path,
    identifiers,
    attach,
    sql,
    auth,
    ids,
    stop_after,
    user_id,
    screen_name,
    since,
    since_id,
):
    "Save tweets posted by specified user"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)

    # Backwards compatible support for old --user_id and --screen_name options
    if screen_name:
        if ids:
            raise click.ClickException("Cannot use --screen_name with --ids")
        identifiers.append(screen_name)

    if user_id:
        if not identifiers:
            identifiers = [user_id]
        else:
            if not ids:
                raise click.ClickException("Use --user_id with --ids")
            identifiers.append(user_id)

    # If identifiers is empty, fetch the authenticated user
    fetch_profiles = True
    if not identifiers:
        fetch_profiles = False
        profile = utils.get_profile(db, session, user_id, screen_name)
        identifiers = [profile["screen_name"]]
        ids = False

    format_string = (
        "@{:" + str(max(len(str(identifier))
                        for identifier in identifiers)) + "}")

    for identifier in identifiers:
        kwargs = {}
        if ids:
            kwargs["user_id"] = identifier
        else:
            kwargs["screen_name"] = identifier
        if fetch_profiles:
            profile = utils.get_profile(db, session, **kwargs)
        else:
            profile = db["users"].get(profile["id"])
        expected_length = profile["statuses_count"]

        if since or since_id:
            expected_length = None

        with click.progressbar(
                utils.fetch_user_timeline(session,
                                          db,
                                          stop_after=stop_after,
                                          since_id=since_id,
                                          since=since,
                                          **kwargs),
                length=expected_length,
                label=format_string.format(profile["screen_name"]),
                show_pos=True,
        ) as bar:
            # Save them 100 at a time
            chunk = []
            for tweet in bar:
                chunk.append(tweet)
                if len(chunk) >= 100:
                    utils.save_tweets(db, chunk)
                    chunk = []
            if chunk:
                utils.save_tweets(db, chunk)
def db(tweets):
    db = sqlite_utils.Database(memory=True)
    utils.save_tweets(db, tweets)
    return db