Esempio n. 1
0
def _shared_friends_ids_followers_ids(db_path, identifiers, attach, sql, auth,
                                      ids, sleep, api_url, first_key,
                                      second_key):
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    for identifier in identifiers:
        # Make sure this user is saved
        arg_user_id = identifier if ids else None
        arg_screen_name = None if ids else identifier
        profile = utils.get_profile(db, session, arg_user_id, arg_screen_name)
        user_id = profile["id"]
        args = {("user_id" if ids else "screen_name"): identifier}
        for id_batch in utils.cursor_paginate(session, api_url, args, "ids",
                                              5000, sleep):
            first_seen = datetime.datetime.utcnow().isoformat()
            db["following"].insert_all(
                ({
                    first_key: user_id,
                    second_key: other_id,
                    "first_seen": first_seen
                } for other_id in id_batch),
                ignore=True,
            )
        time.sleep(sleep)
Esempio n. 2
0
def statuses_lookup(db_path, identifiers, attach, sql, auth, skip_existing,
                    silent):
    "Fetch tweets by their IDs"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    if skip_existing:
        existing_ids = set(
            r[0] for r in db.conn.execute("select id from tweets").fetchall())
        identifiers = [i for i in identifiers if int(i) not in existing_ids]
    if silent:
        for batch in utils.fetch_status_batches(session, identifiers):
            utils.save_tweets(db, batch)
    else:
        # Do it with a progress bar
        count = len(identifiers)
        with click.progressbar(
                length=count,
                label="Importing {:,} tweet{}".format(
                    count, "" if count == 1 else "s"),
        ) as bar:
            for batch in utils.fetch_status_batches(session, identifiers):
                utils.save_tweets(db, batch)
                bar.update(len(batch))
Esempio n. 3
0
def list_members(db_path, identifiers, auth, ids):
    "Fetch lists - accepts one or more screen_name/list_slug identifiers"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    for identifier in identifiers:
        utils.fetch_and_save_list(db, session, identifier, ids)
Esempio n. 4
0
def lists(db_path, identifiers, attach, sql, auth, ids, members):
    "Fetch lists belonging to specified users"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    # Make sure we have saved these users to the database
    for batch in utils.fetch_user_batches(session, identifiers, ids):
        utils.save_users(db, batch)
    first = True
    for identifier in identifiers:
        if ids:
            kwargs = {"user_id": identifier}
        else:
            kwargs = {"screen_name": identifier}
        fetched_lists = utils.fetch_lists(db, session, **kwargs)
        if members:
            for new_list in fetched_lists:
                utils.fetch_and_save_list(
                    db, session, new_list["full_name"].rstrip("@")
                )
        if not first:
            # Rate limit is one per minute
            first = False
            time.sleep(60)
Esempio n. 5
0
def users_lookup(db_path, identifiers, attach, sql, auth, ids):
    "Fetch user accounts"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    for batch in utils.fetch_user_batches(session, identifiers, ids):
        utils.save_users(db, batch)
Esempio n. 6
0
def track(db_path, track, auth, verbose):
    "Experimental: Save tweets matching these keywords in real-time"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    for tweet in utils.stream_filter(session, track=track):
        if verbose:
            print(json.dumps(tweet, indent=2))
        with db.conn:
            utils.save_tweets(db, [tweet])
Esempio n. 7
0
def favorites(db_path, auth, user_id, screen_name, stop_after):
    "Save tweets favorited by specified user"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    profile = utils.get_profile(db, session, user_id, screen_name)
    with click.progressbar(
        utils.fetch_favorites(session, db, user_id, screen_name, stop_after),
        label="Importing favorites",
        show_pos=True,
    ) as bar:
        utils.save_tweets(db, bar, favorited_by=profile["id"])
Esempio n. 8
0
def _shared_timeline(db_path,
                     auth,
                     since,
                     since_id,
                     table,
                     api_url,
                     sleep=1,
                     since_type=None):
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    profile = utils.get_profile(db, session)
    expected_length = 800
    since_key = profile["id"]

    with click.progressbar(
            utils.fetch_timeline(
                session,
                api_url,
                db,
                sleep=sleep,
                since=since,
                since_id=since_id,
                since_type=since_type,
                since_key=since_key,
            ),
            length=expected_length,
            label="Importing tweets",
            show_pos=True,
    ) as bar:
        # Save them 100 at a time
        def save_chunk(db, chunk):
            utils.save_tweets(db, chunk)
            # Record who's timeline they came from
            db[table].insert_all(
                [{
                    "user": profile["id"],
                    "tweet": tweet["id"]
                } for tweet in chunk],
                pk=("user", "tweet"),
                foreign_keys=("user", "tweet"),
                replace=True,
            )

        chunk = []
        for tweet in bar:
            chunk.append(tweet)
            if len(chunk) >= 100:
                save_chunk(db, chunk)
                chunk = []
        if chunk:
            save_chunk(db, chunk)
Esempio n. 9
0
def _shared_friends_followers(db_path, identifiers, attach, sql, auth, ids,
                              silent, noun):
    assert noun in ("friends", "followers")
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)

    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)

    if not identifiers:
        profile = utils.get_profile(db, session)
        identifiers = [profile["screen_name"]]

    for identifier in identifiers:
        if ids:
            kwargs = {"user_id": identifier}
        else:
            kwargs = {"screen_name": identifier}

        fetched = []
        # Get the follower count, so we can have a progress bar
        count = 0

        profile = utils.get_profile(db, session, **kwargs)
        screen_name = profile["screen_name"]
        user_id = profile["id"]

        save_users_kwargs = {}
        if noun == "followers":
            save_users_kwargs["followed_id"] = user_id
        elif noun == "friends":
            save_users_kwargs["follower_id"] = user_id

        def go(update):
            for users_chunk in utils.fetch_user_list_chunks(session,
                                                            user_id,
                                                            screen_name,
                                                            noun=noun):
                fetched.extend(users_chunk)
                utils.save_users(db, users_chunk, **save_users_kwargs)
                update(len(users_chunk))

        if not silent:
            count = profile["{}_count".format(noun)]
            with click.progressbar(
                    length=count,
                    label="Importing {:,} {} for @{}".format(
                        count, noun, screen_name),
            ) as bar:
                go(bar.update)
        else:
            go(lambda x: None)
Esempio n. 10
0
def import_(db_path, paths):
    """
    Import data from a Twitter exported archive. Input can be the path to a zip
    file, a directory full of .js files or one or more direct .js files.
    """
    db = utils.open_database(db_path)
    for filepath in paths:
        path = pathlib.Path(filepath)
        if path.suffix == ".zip":
            for filename, content in utils.read_archive_js(filepath):
                archive.import_from_file(db, filename, content)
        elif path.is_dir():
            # Import every .js file in this directory
            for filepath in path.glob("*.js"):
                archive.import_from_file(db, filepath.name, open(filepath, "rb").read())
        elif path.suffix == ".js":
            archive.import_from_file(db, path.name, open(path, "rb").read())
        else:
            raise click.ClickException("Path must be a .js or .zip file or a directory")
Esempio n. 11
0
def follow(db_path, identifiers, attach, sql, ids, auth, verbose):
    "Experimental: Follow these Twitter users and save tweets in real-time"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    # Make sure we have saved these users to the database
    for batch in utils.fetch_user_batches(session, identifiers, ids):
        utils.save_users(db, batch)
    # Ensure we have user IDs, not screen names
    if ids:
        follow = identifiers
    else:
        follow = utils.user_ids_for_screen_names(db, identifiers)
    # Start streaming:
    for tweet in utils.stream_filter(session, follow=follow):
        if verbose:
            print(json.dumps(tweet, indent=2))
        with db.conn:
            utils.save_tweets(db, [tweet])
Esempio n. 12
0
def search(db_path, q, auth, since, **kwargs):
    """
    Save tweets from a search. Full documentation here:

    https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets
    """
    since_id = kwargs.pop("since_id", None)
    stop_after = kwargs.pop("stop_after", None)
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)

    search_args = {"q": q}
    for key, value in kwargs.items():
        if value is not None:
            search_args[key] = value

    args_hash = hashlib.sha1(
        json.dumps(search_args, sort_keys=True,
                   separators=(",", ":")).encode("utf8")).hexdigest()

    tweets = utils.fetch_timeline(
        session,
        "https://api.twitter.com/1.1/search/tweets.json",
        db,
        search_args,
        sleep=6,
        key="statuses",
        stop_after=stop_after,
        since_id=since_id,
        since_type="search",
        since_key=args_hash,
    )
    chunk = []
    first = True

    if not db["search_runs"].exists():
        db["search_runs"].create(
            {
                "id": int,
                "name": str,
                "args": str,
                "started": str,
                "hash": str
            },
            pk="id")

    def save_chunk(db, search_run_id, chunk):
        utils.save_tweets(db, chunk)
        # Record which search run produced them
        db["search_runs_tweets"].insert_all(
            [{
                "search_run": search_run_id,
                "tweet": tweet["id"]
            } for tweet in chunk],
            pk=("search_run", "tweet"),
            foreign_keys=(
                ("search_run", "search_runs", "id"),
                ("tweet", "tweets", "id"),
            ),
            replace=True,
        )

    search_run_id = None
    for tweet in tweets:
        if first:
            first = False
            search_run_id = (db["search_runs"].insert(
                {
                    "name": search_args["q"],
                    "args": {
                        key: value
                        for key, value in search_args.items()
                        if key not in {"q", "count"}
                    },
                    "started": datetime.datetime.utcnow().isoformat(),
                    "hash": args_hash,
                },
                alter=True,
            ).last_pk)
        chunk.append(tweet)
        if len(chunk) >= 10:
            save_chunk(db, search_run_id, chunk)
            chunk = []
    if chunk:
        save_chunk(db, search_run_id, chunk)
Esempio n. 13
0
def user_timeline(
    db_path,
    identifiers,
    attach,
    sql,
    auth,
    ids,
    stop_after,
    user_id,
    screen_name,
    since,
    since_id,
):
    "Save tweets posted by specified user"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)

    # Backwards compatible support for old --user_id and --screen_name options
    if screen_name:
        if ids:
            raise click.ClickException("Cannot use --screen_name with --ids")
        identifiers.append(screen_name)

    if user_id:
        if not identifiers:
            identifiers = [user_id]
        else:
            if not ids:
                raise click.ClickException("Use --user_id with --ids")
            identifiers.append(user_id)

    # If identifiers is empty, fetch the authenticated user
    fetch_profiles = True
    if not identifiers:
        fetch_profiles = False
        profile = utils.get_profile(db, session, user_id, screen_name)
        identifiers = [profile["screen_name"]]
        ids = False

    format_string = (
        "@{:" + str(max(len(str(identifier))
                        for identifier in identifiers)) + "}")

    for identifier in identifiers:
        kwargs = {}
        if ids:
            kwargs["user_id"] = identifier
        else:
            kwargs["screen_name"] = identifier
        if fetch_profiles:
            profile = utils.get_profile(db, session, **kwargs)
        else:
            profile = db["users"].get(profile["id"])
        expected_length = profile["statuses_count"]

        if since or since_id:
            expected_length = None

        with click.progressbar(
                utils.fetch_user_timeline(session,
                                          db,
                                          stop_after=stop_after,
                                          since_id=since_id,
                                          since=since,
                                          **kwargs),
                length=expected_length,
                label=format_string.format(profile["screen_name"]),
                show_pos=True,
        ) as bar:
            # Save them 100 at a time
            chunk = []
            for tweet in bar:
                chunk.append(tweet)
                if len(chunk) >= 100:
                    utils.save_tweets(db, chunk)
                    chunk = []
            if chunk:
                utils.save_tweets(db, chunk)