def statuses_lookup(db_path, identifiers, attach, sql, auth, skip_existing, silent): "Fetch tweets by their IDs" auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) if skip_existing: existing_ids = set( r[0] for r in db.conn.execute("select id from tweets").fetchall()) identifiers = [i for i in identifiers if int(i) not in existing_ids] if silent: for batch in utils.fetch_status_batches(session, identifiers): utils.save_tweets(db, batch) else: # Do it with a progress bar count = len(identifiers) with click.progressbar( length=count, label="Importing {:,} tweet{}".format( count, "" if count == 1 else "s"), ) as bar: for batch in utils.fetch_status_batches(session, identifiers): utils.save_tweets(db, batch) bar.update(len(batch))
def save_chunk(db, chunk): utils.save_tweets(db, chunk) # Record who's timeline they came from db[table].insert_all( [{"user": profile["id"], "tweet": tweet["id"]} for tweet in chunk], pk=("user", "tweet"), foreign_keys=("user", "tweet"), replace=True, )
def track(db_path, track, auth, verbose): "Experimental: Save tweets matching these keywords in real-time" auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) for tweet in utils.stream_filter(session, track=track): if verbose: print(json.dumps(tweet, indent=2)) with db.conn: utils.save_tweets(db, [tweet])
def save_chunk(db, search_run_id, chunk): utils.save_tweets(db, chunk) # Record which search run produced them db["search_runs_tweets"].insert_all( [{"search_run": search_run_id, "tweet": tweet["id"]} for tweet in chunk], pk=("search_run", "tweet"), foreign_keys=( ("search_run", "search_runs", "id"), ("tweet", "tweets", "id"), ), replace=True, )
def favorites(db_path, auth, user_id, screen_name, stop_after): "Save tweets favorited by specified user" auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) profile = utils.get_profile(db, session, user_id, screen_name) with click.progressbar( utils.fetch_favorites(session, db, user_id, screen_name, stop_after), label="Importing favorites", show_pos=True, ) as bar: utils.save_tweets(db, bar, favorited_by=profile["id"])
def follow(db_path, identifiers, attach, sql, ids, auth, verbose): "Experimental: Follow these Twitter users and save tweets in real-time" auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) # Make sure we have saved these users to the database for batch in utils.fetch_user_batches(session, identifiers, ids): utils.save_users(db, batch) # Ensure we have user IDs, not screen names if ids: follow = identifiers else: follow = utils.user_ids_for_screen_names(db, identifiers) # Start streaming: for tweet in utils.stream_filter(session, follow=follow): if verbose: print(json.dumps(tweet, indent=2)) with db.conn: utils.save_tweets(db, [tweet])
def user_timeline( db_path, identifiers, attach, sql, auth, ids, stop_after, user_id, screen_name, since, since_id, ): "Save tweets posted by specified user" auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) # Backwards compatible support for old --user_id and --screen_name options if screen_name: if ids: raise click.ClickException("Cannot use --screen_name with --ids") identifiers.append(screen_name) if user_id: if not identifiers: identifiers = [user_id] else: if not ids: raise click.ClickException("Use --user_id with --ids") identifiers.append(user_id) # If identifiers is empty, fetch the authenticated user fetch_profiles = True if not identifiers: fetch_profiles = False profile = utils.get_profile(db, session, user_id, screen_name) identifiers = [profile["screen_name"]] ids = False format_string = ( "@{:" + str(max(len(str(identifier)) for identifier in identifiers)) + "}") for identifier in identifiers: kwargs = {} if ids: kwargs["user_id"] = identifier else: kwargs["screen_name"] = identifier if fetch_profiles: profile = utils.get_profile(db, session, **kwargs) else: profile = db["users"].get(profile["id"]) expected_length = profile["statuses_count"] if since or since_id: expected_length = None with click.progressbar( utils.fetch_user_timeline(session, db, stop_after=stop_after, since_id=since_id, since=since, **kwargs), length=expected_length, label=format_string.format(profile["screen_name"]), show_pos=True, ) as bar: # Save them 100 at a time chunk = [] for tweet in bar: chunk.append(tweet) if len(chunk) >= 100: utils.save_tweets(db, chunk) chunk = [] if chunk: utils.save_tweets(db, chunk)
def db(tweets): db = sqlite_utils.Database(memory=True) utils.save_tweets(db, tweets) return db