Esempio n. 1
0
def fetch_user_data(screen_name):
    print("FETCHING...", screen_name)

    #
    # fetch user info
    #
    try:
        user = twitter_api.get_user(screen_name)
    except tweepy.error.TweepError:
        flash(f"User '{screen_name}' does not exist!", "dark")
        return redirect("/users")
    #
    # store user info in database
    #
    db_user = User.query.get(user.id) or User(id=user.id)
    db_user.screen_name = user.screen_name
    db_user.name = user.name
    db_user.location = user.location
    db_user.followers_count = user.followers_count

    db.session.add(db_user)
    db.session.commit()

    #
    # fetch their tweets
    #
    # statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=35, exclude_replies=True, include_rts=False)
    statuses = twitter_api.user_timeline(screen_name,
                                         tweet_mode="extended",
                                         count=150)
    print("STATUSES", len(statuses))

    #
    # fetch embedding for each tweet
    #
    tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(tweet_texts, model="twitter"))
    print("EMBEDDINGS", len(embeddings))

    #
    # store tweets in database (w/ embeddings)
    #

    for index, status in enumerate(statuses):
        print(status.full_text)
        print("----")
        # get existing tweet from the db or initialize a new one:
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id
        db_tweet.full_text = status.full_text
        embedding = embeddings[index]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)

    db.session.commit()

    flash(f"User '{screen_name}' added successfully!", "dark")
    return redirect("/users")
Esempio n. 2
0
def get_user(screen_name=None):
    print(screen_name)

    twitter_user = twitter_api_client.get_user(screen_name)
    statuses = twitter_api_client.user_timeline(screen_name,
                                                tweet_mode="extended",
                                                count=150)
    print("STATUSES COUNT:", len(statuses))
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db.session.commit()
    all_tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_api_client.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    counter = 0
    for status in statuses:
        print(status.full_text)
        print("----")
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id
        db_tweet.full_text = status.full_text
        embedding = embeddings[counter]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        counter += 1
    db.session.commit()
    return render_template("user.html", user=db_user, tweets=statuses)
Esempio n. 3
0
def fetch_user_data(screen_name):
    print('FETCHING...', screen_name)

    # fetch user info
    user = twitter_api.get_user(screen_name)
    #
    # store user info in database
    #
    db_user = User.query.get(user.id) or User(id=user.id)
    # $ x = 6 or 7
    # x = 6
    # $ y = None or 7
    # y = 7
    # If user input doesn't exist, create that user
    db_user.screen_name = user.screen_name
    db_user.name = user.name
    db_user.location = user.location
    db_user.followers_count = user.followers_count

    db.session.add(db_user)
    db.session.commit()

    #
    # fetch their tweets
    #

    statuses = twitter_api.user_timeline(screen_name,
                                         tweet_mode="extended",
                                         count=150)
    print('STATUSES', len(statuses))

    #
    # fetch embedding for each tweet
    #

    tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(tweet_texts, model="twitter"))
    # todo: prefer to make a single request to basilica with all the tweet texts, instead of
    # a request per tweet
    print('EMBEDDINGS', len(embeddings))

    #
    # store tweets in database (w/ embeddings)
    #

    for index, status in enumerate(statuses):
        print(status.full_text)
        print("----")
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        embedding = embeddings[index]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)

    db.session.commit()

    return f'FETCHED {screen_name} OK'
def fetch_user_data(screen_name):
    print("Fetching...", screen_name)

    #
    # fetch user info
    #

    user = twitter_api.get_user(screen_name)
    #
    # Store users info in database
    #

    db_user = User.query.get(user.id) or User(id=user.id)
    db_user.screen_name = user.screen_name
    db_user.name = user.name
    db_user.location = user.location
    db_user.followers_count = user.followers_count

    db.session.add(db_user)
    db.session.commit()

    #
    # fetch user tweets
    #

    statuses = twitter_api.user_timeline(screen_name,
                                         tweet_mode="extended",
                                         count=150,
                                         include_rts=False)
    print("Statuses ", len(statuses))

    #
    # fetch embedding for each tweet
    #
    tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(tweet_texts, model="twitter"))
    print("Embeddings ", embeddings)
    #
    # Store tweets in database (w/ embeddings)
    #

    for index, status in enumerate(statuses):
        print(status.full_text)
        print("----")
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id
        db_tweet.full_text = status.full_text
        embedding = embeddings[index]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)

    db.session.commit()

    # TODO: store tweets in database (w/ embeddings)

    return f"Fetched {screen_name} OK"
    # return jsonify({"user": user._json, "num_tweets": len(statuses)})
Esempio n. 5
0
def fetch_user(screen_name=None):
    print(screen_name)
    # breakpoint()

    # FETCH DATA FROM TWITTER API
    twitter_user = twitter_api_client.get_user(screen_name)
    breakpoint()
    # tweets = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150, exclude_replies=True, include_rts=False)
    # print("TWEETS COUNT:", len(tweets))
    #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]})

    # STORE TWITTER DATA IN DB

    # get existing user from the db or initialize a new one:
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db.session.commit()
    #return "OK"
    #breakpoint()

    # FETCH TWEETS
    tweets = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150, exclude_replies=True, include_rts=False)
    print("TWEETS COUNT:", len(tweets))
    


    # basilica_api = basilica_api_client()

    all_tweet_texts = [status.full_text for status in tweets]
    embeddings = list(basilica_api_client.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    # # TODO: explore using the zip() function maybe...
    # counter = 0
    for index, status in enumerate(tweets):
        print(index)
        print(status.full_text)
        print("----")
        # embedding = basilica_api_client.embed_sentence(status.full_text, model="twitter") # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet
        # print(len(embedding))
        embedding = embeddings[index]

        #print(dir(status))
        # get existing tweet from the db or initialize a new one:
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id # or db_user.id
        db_tweet.full_text = status.full_text
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        # counter+=1
    db.session.commit()
    # return "OK"
    print('path A')
    return render_template("user.html", user=db_user, tweets=tweets) # tweets=db_tweets
Esempio n. 6
0
def fetch_user_data(screen_name=None):
    print(screen_name)

    api = api_client()
    twitter_user = api.get_user(screen_name)
    statuses = api.user_timeline(screen_name,
                                 tweet_mode="extended",
                                 count=300,
                                 exclude_replies=False,
                                 include_rts=True)
    print("STATUSES COUNT:", len(statuses))

    #new_book = Book(title=request.form["book_title"], author_id=request.form["author_name"])
    #db.session.add(new_book)
    #db.session.commit()

    #
    # STORE USER
    #

    # get existing user from the db or initialize a new one:
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db.session.commit()
    #breakpoint()

    #
    # STORE TWEETS
    #

    all_tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    # TODO: explore using the zip() function maybe...
    counter = 0
    for status in statuses:
        print(status.full_text)
        print("----")
        #print(dir(status))
        # get existing tweet from the db or initialize a new one:
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        #embedding = basilica_connection.embed_sentence(status.full_text, model="twitter") # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet
        embedding = embeddings[counter]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        counter += 1
    db.session.commit()

    return "OK"
Esempio n. 7
0
def fetch_user_data(screen_name):
    print("FETCHING...", screen_name)

    #
    # fetch user info
    #
    user = twitter_api.get_user(screen_name)

    #
    # store user info in database
    #

    db_user = User.query.get(user.id) or User(id=user.id)
    db_user.screen_name = user.screen_name
    db_user.name = user.name
    db_user.location = user.location
    db_user.followers_count = user.followers_count
    db.session.add(db_user)
    db.session.commit()

    #
    # fetch their tweets
    #

    statuses = twitter_api.user_timeline(screen_name,
                                         tweet_mode="extended",
                                         count=150)
    print("STATUSES", len(statuses))

    #
    # fetch embedding for each tweet
    #

    tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(tweet_texts, model="twitter"))
    print("EMBEDDINGS", len(embeddings))

    #
    # store tweets in database (w/ embeddings)
    #

    for index, status in enumerate(statuses):
        print(status.full_text)
        print("----")
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id
        db_tweet.full_text = status.full_text
        embedding = embeddings[index]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)

    db.session.commit()

    return f"FETCHED {screen_name} OK"
def fetch_user(screen_name=None):
    print(screen_name)

    # FETCHING DATA FROM TWITTER API

    twitter_user = twitter_api_client.get_user(screen_name)

    # STORING TWITTER DATA IN THE DATABASE

    # get existing user from the db OR initialize a new one:
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db.session.commit()
    # return "OK"
    # breakpoint()

    # FETCH TWEETS

    tweets = twitter_api_client.user_timeline(screen_name,
                                              tweet_mode="extended",
                                              count=150)
    print("TWEETS COUNT:", len(tweets))

    # STORING TWITTER DATA IN THE DATABASE

    all_tweet_texts = [status.full_text for status in tweets]
    embeddings = list(
        basilica_api_client.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    for index, status in enumerate(tweets):
        print(index)
        print(status.full_text)
        print("----")

        # embedding = basilica_api_client.embed_sentence(status.full_text,
        #                                               model="twitter")
        # todo: prefer to make a single request to basilica with all the tweet
        # texts, instead of a request per tweet
        # print(len(embedding))
        embedding = embeddings[index]

        # get existing tweet from the db or initialize a new one:
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        db_tweet.embedding = embedding
        db.session.add(db_tweet)

    db.session.commit()
    return "OK"
Esempio n. 9
0
def fetch_user_data(screen_name):
    print("INFO: fetching twitter for inforation for: ", screen_name)

    # Fetch user info from the Twitter API
    user = twitter_api.get_user(screen_name)

    # Grab a user object from the database or create new
    db_user = User.query.get(user.id) or User(id=user.id)

    # Update user object data
    db_user.screen_name = user.screen_name
    db_user.name = user.name
    db_user.location = user.location
    db_user.followers_count = user.followers_count

    # Update the database
    db.session.add(db_user)
    db.session.commit()
    print("INFO: update user information in the db for: ", screen_name)

    # Fetch the user's tweets
    statuses = twitter_api.user_timeline(screen_name,
                                         tweet_mode="extended",
                                         count=10)
    print("INFO: just recevied Twitter statuses number = ", len(statuses))

    # Fetch embeddings for each tweet
    tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(tweet_texts, model="twitter"))

    print("INFO: just received tweet embeddings number = ", len(embeddings))

    # store tweets and associated embeddings in the database
    ctr = 0
    print("INFO: just before look")
    for idx, status in enumerate(statuses):

        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id
        db_tweet.full_text = status.full_text
        embedding = embeddings[idx]

        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        ctr = ctr + 1

    print("INFO: storing tweets and embeddings in the database number = ", ctr)
    db.session.commit()

    flash(
        f'Just stored tweet and embedding information into the database! Number of updates: {ctr}',
        "success")
    return redirect("/call_twitter")
Esempio n. 10
0
def create_user():
    print("FORM DATA:", dict(request.form))
    # todo: store in database
    print(request.form["screen_name"])
    api = api_client()
    twitter_user = api.get_user(request.form["screen_name"])
    statuses = api.user_timeline(
        request.form["screen_name"], tweet_mode="extended",
        count=150)  #, exclude_replies=True, include_rts=False)
    print("STATUSES COUNT:", len(statuses))

    # STORE USER

    # get existing user from the db or initialize a new one:
    db_user = User.query.get(twitter_user.id) or User(
        id=twitter_user.id)  #if not in DB add to db.
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db.session.commit()
    #breakpoint()

    # STORE TWEETS

    all_tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    # TODO: explore using the zip() function maybe...
    counter = 0
    for status in statuses:
        print(status.full_text)
        print("----")
        #print(dir(status))
        # get existing tweet from the db or initialize a new one:
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        #embedding = basilica_client.embed_sentence(status.full_text, model="twitter") # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet
        embedding = embeddings[counter]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        counter += 1
    db.session.commit()

    return redirect(f"/user_list")
Esempio n. 11
0
def get_user(screen_name=None):
    print(screen_name)
    twitter_user = twitter_api.get_user(screen_name)
    statuses = twitter_api.user_timeline(screen_name,
                                         tweet_mode="extended",
                                         count=150,
                                         exclude_replies=True,
                                         include_rts=False)
    # return jsonify({
    #     "user": user._json,
    #      "tweets": [status._json for status in statuses]
    # })

    # get existing user from the db or initialize a new one:
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db.session.commit()
    #breakpoint()
    #   return "OK"
    basilica_api = basilica_connection

    all_tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    # TODO: explore using the zip() function maybe...
    counter = 0
    for status in statuses:
        print(status.full_text)
        print("----")
        #print(dir(status))
        # get existing tweet from the db or initialize a new one:
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        #embedding = basilica_client.embed_sentence(status.full_text, model="twitter") # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet
        embedding = embeddings[counter]
        print(len(embedding))
        db_tweet.embedding = embeddings[counter]
        db.session.add(db_tweet)
        counter += 1
    db.session.commit()
    #breakpoint()
    return "OK"
Esempio n. 12
0
def get_user(screen_name=None):
    print(screen_name)

    twitter_user = twitter_api_client.get_user(screen_name)
    statuses = twitter_api_client.user_timeline(screen_name,
                                                tweet_mode="extended",
                                                count=150)

    print("STATUSES COUNT:", len(statuses))
    #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]})

    # get existing user from db or initilize a new one
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_counts = twitter_user.followers_count

    db.session.add(db.user)
    db.session.commit()
    #return "OK"
    #breakpoint()

    all_tweets_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_api_client.embed_sentences(all_tweets_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    counter = 0
    for status in statuses:
        print(status.full_text)
        print("-----")
        # get existing tweet from the db or initalize a new one
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        embedding = embeddings[counter]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        counter += 1
    db.session.commit()
    return "OK"

    return render_template("user.html", user=db_user,
                           tweets=statuses)  # tweets=db_tweets
Esempio n. 13
0
def get_user(screen_name=None):
    print(screen_name)

    twitter_user = twitter_api.get_user(screen_name)
    statuses = twitter_api.user_timeline(screen_name,
                                         tweet_mode="extended",
                                         count=150)

    # get existing user from the db or initialize a new one:
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db.session.commit()
    # return "OK"
    # breakpoint()

    all_tweet_texts = [status.full_text for status in statuses]
    embeddings = list(basilica_connection.embed_sentences(all_tweet_texts,
                                                          model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    counter = 0
    for status in statuses:
        print(status.full_text)
        print("----")
        # print(dir(status))
        # get existing tweet from the db or initialize a new one:
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        # embedding = basilica_connection.embed_sentence(status.full_text,
        #                                                model="twitter")
        # todo: prefer to make a single request to basilica with all the
        # tweet texts, instead of a request per tweet
        db_tweet.embedding = embeddings[counter]
        db.session.add(db_tweet)
        counter += 1
    db.session.commit()
    # breakpoint()

    return "OK"
Esempio n. 14
0
def store_twitter_user_data(screen_name):
    api = api_client()
    twitter_user = api.get_user(screen_name)
    #statuses = api.user_timeline(screen_name, tweet_mode="extended", count=150, exclude_replies=True, include_rts=False)
    statuses = api.user_timeline(screen_name, tweet_mode="extended", count=150)
    #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]})

    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db.session.commit()
    #return "OK"
    #breakpoint()

    print("STATUS COUNT:", len(statuses))
    # basilica_api = basilica_client()
    all_tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_client.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    # TODO: explore using the zip() function maybe...
    counter = 0
    for status in statuses:
        print(status.full_text)
        print("----")
        #print(dir(status))

        # Find or create database tweet:
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        #embedding = basilica_client.embed_sentence(status.full_text, model="twitter") # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet
        embedding = embeddings[counter]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        counter += 1
    db.session.commit()

    return db_user, statuses
Esempio n. 15
0
def fetch_user_data(screen_name=None):
    print(screen_name)
    api = twitter_api()
    twitter_user = api.get_user(screen_name)
    statuses = api.user_timeline(screen_name, tweet_mode="extended", count=150)
    print("STATUSES COUNT:", len(statuses))

    # store users in the database
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db.session.commit()

    # store tweets in the database
    all_tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS:", len(embeddings))

    counter = 0
    for status in statuses:
        print(status.full_text)
        print('----')

        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id
        db_tweet.full_text = status.full_text

        embedding = embeddings[counter]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        counter += 1
    db.session.commit()

    return "OK"
Esempio n. 16
0
def fetch_user_data(screen_name):
    # print("Fetching:", screen_name)

    # 1. Fetch user info
    user = twitter_api.get_user(screen_name)
    
    # 2. Store user info in DB
    db_user = User.query.get(user.id) or User(id=user.id)
    db_user.screen_name = user.screen_name
    db_user.name = user.name
    db_user.location = user.location
    db_user.followers_count = user.followers_count
    db.session.add(db_user)
    db.session.commit()
    

    # 3. Fetch their tweets
    users_tweets = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=200, exclude_replies=True, include_rts=False)

    # 4. Fetch embedding for tweet
    tweet_texts = [tweets.full_text for tweets in users_tweets]
    embeddings = list(basilica_connection.embed_sentences(tweet_texts, model="twitter"))
    # print("EMBEDDINGS", len(embeddings))
    
    # 5. Store tweets in database with embedding
    for index, tweets in enumerate(users_tweets):
        db_tweet = Tweet.query.get(tweets.id) or Tweet(id=tweets.id)
        db_tweet.user_id = tweets.author.id
        db_tweet.full_text = tweets.full_text
        embedding = embeddings[index]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)

    db.session.commit()

    # return jsonify({"user": user._json, "num_tweets": len(users_tweets)})
    return redirect("/tweets")
Esempio n. 17
0
def get_user(screen_name=None):
    print(screen_name)

    # Use the input screen name to get user info and save as attributes of twitter_user

    # (using get_user method on twitter api)

    twitter_user = twitter_api_client.get_user(screen_name)

    # Use the input screen name to get latest tweets (using user_timeline method on twitter_api)

    # exclude_replies=True, include_rts=False - excludes replies and retweets - causes a different amount of tweets per user

    # Collect 150 latest tweets from each user

    statuses = twitter_api_client.user_timeline(screen_name,
                                                tweet_mode="extended",
                                                count=150)
    print("STATUSES COUNT:", len(statuses))
    #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]})

    ## ADD USER TABLE INFORMATION TO DB

    # Get existing user from the db or initialize a new one if it doesn't exist yet:

    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count

    # Add and commit changes to the database user table

    db.session.add(db_user)
    db.session.commit()
    #return "OK"
    #breakpoint()

    # ADD TWEETS TO TWEET TABLE IN DB

    # List comprehension to create a list of strings to feed to Basilica model

    all_tweet_texts = [status.full_text for status in statuses]

    # Use Basilica to embed the written words of the tweets as numeric values

    # Define twitter specific model for Basilica to use

    embeddings = list(
        basilica_api_client.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    # Store each tweet in the database

    # For each tweet in the list of tweets pulled from the twitter api above and stored as statuses

    counter = 0
    for status in statuses:
        print(status.full_text)
        print("----")

        # Get existing tweet from the db or initialize a new one:

        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)

        # Set user_id and the full text of the tweet attribute of instance

        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text

        embedding = basilica_api_client.embed_sentence(
            status.full_text, model="twitter"
        )  # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet
        embedding = embeddings[counter]
        print(len(embedding))

        # Set the corresponding embedding from our list of embeddings

        db_tweet.embedding = embeddings[counter]

        # Add the tweet to the database

        db.session.add(db_tweet)

        # We are using the counter to identify the series of tweets

        counter += 1

    # Commit changes to database table

    db.session.commit()

    #breakpoint()

    # This is what the front end website will see

    #return "User added"

    return render_template("user.html", user=db_user,
                           tweets=statuses)  # tweets=db_tweets
Esempio n. 18
0
def create_user():

    screen_name = request.form['screen_name']

    # Use the input screen name to get user info and save as attributes of twitter_user

    # (using get_user method on twitter api)

    twitter_user = twitter_api_client.get_user(screen_name)

    # Use the input screen name to get latest tweets (using user_timeline method on twitter_api)

    # exclude_replies=True, include_rts=False - excludes replies and retweets - causes a different amount of tweets per user

    # Collect 150 latest tweets from each user

    statuses = twitter_api_client.user_timeline(screen_name,
                                                tweet_mode="extended",
                                                count=150)

    # ADD USER TABLE INFORMATION TO DB

    # Get existing user from the db or initialize a new one if it doesn't exist yet:

    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)

    # Set attributes for db user equal to info gathered from twitter

    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count

    # Add and commit changes to the database user table

    db.session.add(db_user)
    db.session.commit()

    # ADD TWEETS TO TWEET TABLE IN DB

    # List comprehension to create a list of strings to feed to Basilica model

    all_tweet_texts = [status.full_text for status in statuses]

    # Use Basilica to embed the written words of the tweets as numeric values

    # Define twitter specific model for Basilica to use

    embeddings = list(
        basilica_api_client.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    # Store each tweet in the database

    # For each tweet in the list of tweets pulled from the twitter api above and stored as statuses

    counter = 0
    for status in statuses:
        print(status.full_text)
        print("----")

        # Get existing tweet from the db or initialize a new one:

        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)

        # Set user_id and the full text of the tweet attribute of instance

        db_tweet.user_id = status.author.id
        db_tweet.full_text = status.full_text

        # Set the corresponding embedding from our list of embeddings

        db_tweet.embedding = embeddings[counter]

        # Add the tweet to the database

        db.session.add(db_tweet)

        # We are using the counter to identify the series of tweets

        counter += 1

    # Commit changes to database table

    db.session.commit()

    # This is what the front end website will see

    # Can customize this to include the data that we saved to the database

    flash(f"User '{db_user.screen_name}' created successfully!", "success")
    return redirect("/")
Esempio n. 19
0
def fetch_user(screen_name=None):
    print(screen_name)

    # Fetching data from twitter API
    twitter_user = twitter_api_client.get_user(screen_name)

    # get existing user from the db or initialize a new one:
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count

    db.session.add(db_user)
    db.session.commit()
    # return "OK"
    # breakpoint()

    # Fetch Tweets!!!

    tweets = twitter_api_client.user_timeline(screen_name,
                                              tweet_mode="extended",
                                              count=250,
                                              exclude_replies=True,
                                              include_rts=False)
    print("STATUSES COUNT:", len(tweets))
    # return jsonify({"user": user._json, "tweets": [s._json for s in statuses]})

    # extract just the text from the status objects, because tweets have a lot more than text
    all_tweet_texts = [tweet.full_text for tweet in tweets]
    num_embeddings = list(
        basilica_api_client.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(num_embeddings))

    # # TODO: explore using the zip() function maybe...

    for index, status in enumerate(tweets):
        num_embedding = num_embeddings[index]
        #   STORE TWEETS IN DATABASE
        # slow loop
        # for status in tweets:
        #     print(status.full_text)
        #     print("----")
        #     # print(dir(status))
        #     # get existing tweet from the db or initialize a new one:
        #     num_embedding = basilica_api_client.embed_sentence(
        #         status.full_text, model="twitter")
        #     print(len(num_embedding))

        db_tweet = Tweet.query.get(status.id) or Tweet(tweet_id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.text = status.full_text
        # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet
        # embedding = embeddings[counter]
        db_tweet.num_embedding = num_embedding
        db.session.add(db_tweet)

        # TODO: Change to context syntax
        db.session.commit()
    # # breakpoint()
    return "OK"
Esempio n. 20
0
def fetch_user_data(screen_name):
    print("FETCHING...", screen_name)

    #
    # fetch user info
    #
    user = twitter_api.get_user(screen_name)

    #
    # store user in database
    #

    db_user = User.query.get(user.id) or User(id=user.id)
    db_user.screen_name = user.screen_name
    db_user.name = user.name
    db_user.location = user.location
    db_user.followers_count = user.followers_count

    db.session.add(db_user)
    db.session.commit()

    #
    # fetch their tweets
    #

    #statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=35, exclude_replies=True, include_rts=False)
    statuses = twitter_api.user_timeline(screen_name,
                                         tweet_mode="extended",
                                         count=50)
    print("STATUSES", len(statuses))
    #
    # fetch embedding for each tweet (will give us a list of lists)
    #
    tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    #
    # store tweets in database (w/embeddings)
    #

    #counter =0
    for index, status in enumerate(statuses):
        print(status.full_text)
        print("----")
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        #
        # fetching corresponding embedding
        #
        embedding = basilica_connection.embed_sentence(status.full_text,
                                                       model="twitter")
        #embedding = embeddings[counter]
        embedding = embeddings[index]
        #print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        #counter+=1

    db.session.commit()

    return f"FETCHED {screen_name} OK"