def harvest_single_user(maintask: MainTask, api: tweepy.API,
                        doc: cloudant.document, db: DBHelper) -> bool:
    max_id = 0
    min_id_last_round = None
    counter = 0
    while True:
        kwargs = {
            "user_id": doc["_id"],
            "since_id": int(doc["last_harvest_tweet_id"]) + 1,
            "include_rts": "false"
        }
        if min_id_last_round is not None:
            kwargs["max_id"] = str(min_id_last_round - 1)

        try:
            status_list = api.user_timeline(**kwargs)
        except Exception as e:
            maintask.log("user tweets: twitter api error, backoff", e)
            return False

        ids = []
        if len(status_list) == 0:
            break

        for status in status_list:
            counter += 1
            t_json = status._json
            t_id = int(t_json["id_str"])
            max_id = max(max_id, t_id)
            ids.append(t_id)
            db.add_tweet(t_json)

        min_id_last_round = min(ids)

        maintask.log("user tweets: ids from ", min(ids), "to", max(ids))

    doc["last_harvest_tweet_id"] = str(
        max(max_id, int(doc["last_harvest_tweet_id"])))
    doc["last_harvest"] = int(time.time())
    doc.save()

    maintask.log("user tweets: got tweets", counter)

    return True
Beispiel #2
0
def _handle_one_tweet(db: DBHelper, t_json: Dict[str, Any]):
    db.add_tweet(t_json)
    db.add_user(t_json["user"]["id_str"], t_json["user"]["screen_name"])