def fetch_user_data(screen_name): print("FETCHING...", screen_name) # # fetch user info # try: user = twitter_api.get_user(screen_name) except tweepy.error.TweepError: flash(f"User '{screen_name}' does not exist!", "dark") return redirect("/users") # # store user info in database # db_user = User.query.get(user.id) or User(id=user.id) db_user.screen_name = user.screen_name db_user.name = user.name db_user.location = user.location db_user.followers_count = user.followers_count db.session.add(db_user) db.session.commit() # # fetch their tweets # # statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=35, exclude_replies=True, include_rts=False) statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=150) print("STATUSES", len(statuses)) # # fetch embedding for each tweet # tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_connection.embed_sentences(tweet_texts, model="twitter")) print("EMBEDDINGS", len(embeddings)) # # store tweets in database (w/ embeddings) # for index, status in enumerate(statuses): print(status.full_text) print("----") # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id db_tweet.full_text = status.full_text embedding = embeddings[index] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) db.session.commit() flash(f"User '{screen_name}' added successfully!", "dark") return redirect("/users")
def get_user(screen_name=None): print(screen_name) twitter_user = twitter_api_client.get_user(screen_name) statuses = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150) print("STATUSES COUNT:", len(statuses)) db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() all_tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) counter = 0 for status in statuses: print(status.full_text) print("----") db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id db_tweet.full_text = status.full_text embedding = embeddings[counter] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) counter += 1 db.session.commit() return render_template("user.html", user=db_user, tweets=statuses)
def fetch_user_data(screen_name): print('FETCHING...', screen_name) # fetch user info user = twitter_api.get_user(screen_name) # # store user info in database # db_user = User.query.get(user.id) or User(id=user.id) # $ x = 6 or 7 # x = 6 # $ y = None or 7 # y = 7 # If user input doesn't exist, create that user db_user.screen_name = user.screen_name db_user.name = user.name db_user.location = user.location db_user.followers_count = user.followers_count db.session.add(db_user) db.session.commit() # # fetch their tweets # statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=150) print('STATUSES', len(statuses)) # # fetch embedding for each tweet # tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_connection.embed_sentences(tweet_texts, model="twitter")) # todo: prefer to make a single request to basilica with all the tweet texts, instead of # a request per tweet print('EMBEDDINGS', len(embeddings)) # # store tweets in database (w/ embeddings) # for index, status in enumerate(statuses): print(status.full_text) print("----") db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text embedding = embeddings[index] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) db.session.commit() return f'FETCHED {screen_name} OK'
def fetch_user_data(screen_name): print("Fetching...", screen_name) # # fetch user info # user = twitter_api.get_user(screen_name) # # Store users info in database # db_user = User.query.get(user.id) or User(id=user.id) db_user.screen_name = user.screen_name db_user.name = user.name db_user.location = user.location db_user.followers_count = user.followers_count db.session.add(db_user) db.session.commit() # # fetch user tweets # statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=150, include_rts=False) print("Statuses ", len(statuses)) # # fetch embedding for each tweet # tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_connection.embed_sentences(tweet_texts, model="twitter")) print("Embeddings ", embeddings) # # Store tweets in database (w/ embeddings) # for index, status in enumerate(statuses): print(status.full_text) print("----") db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id db_tweet.full_text = status.full_text embedding = embeddings[index] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) db.session.commit() # TODO: store tweets in database (w/ embeddings) return f"Fetched {screen_name} OK" # return jsonify({"user": user._json, "num_tweets": len(statuses)})
def fetch_user(screen_name=None): print(screen_name) # breakpoint() # FETCH DATA FROM TWITTER API twitter_user = twitter_api_client.get_user(screen_name) breakpoint() # tweets = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150, exclude_replies=True, include_rts=False) # print("TWEETS COUNT:", len(tweets)) #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]}) # STORE TWITTER DATA IN DB # get existing user from the db or initialize a new one: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() #return "OK" #breakpoint() # FETCH TWEETS tweets = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150, exclude_replies=True, include_rts=False) print("TWEETS COUNT:", len(tweets)) # basilica_api = basilica_api_client() all_tweet_texts = [status.full_text for status in tweets] embeddings = list(basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) # # TODO: explore using the zip() function maybe... # counter = 0 for index, status in enumerate(tweets): print(index) print(status.full_text) print("----") # embedding = basilica_api_client.embed_sentence(status.full_text, model="twitter") # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet # print(len(embedding)) embedding = embeddings[index] #print(dir(status)) # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text db_tweet.embedding = embedding db.session.add(db_tweet) # counter+=1 db.session.commit() # return "OK" print('path A') return render_template("user.html", user=db_user, tweets=tweets) # tweets=db_tweets
def fetch_user_data(screen_name): print("FETCHING...", screen_name) # # fetch user info # user = twitter_api.get_user(screen_name) # # store user info in database # db_user = User.query.get(user.id) or User(id=user.id) db_user.screen_name = user.screen_name db_user.name = user.name db_user.location = user.location db_user.followers_count = user.followers_count db.session.add(db_user) db.session.commit() # # fetch their tweets # statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=150) print("STATUSES", len(statuses)) # # fetch embedding for each tweet # tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_connection.embed_sentences(tweet_texts, model="twitter")) print("EMBEDDINGS", len(embeddings)) # # store tweets in database (w/ embeddings) # for index, status in enumerate(statuses): print(status.full_text) print("----") db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id db_tweet.full_text = status.full_text embedding = embeddings[index] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) db.session.commit() return f"FETCHED {screen_name} OK"
def fetch_user(screen_name=None): print(screen_name) # FETCHING DATA FROM TWITTER API twitter_user = twitter_api_client.get_user(screen_name) # STORING TWITTER DATA IN THE DATABASE # get existing user from the db OR initialize a new one: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() # return "OK" # breakpoint() # FETCH TWEETS tweets = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150) print("TWEETS COUNT:", len(tweets)) # STORING TWITTER DATA IN THE DATABASE all_tweet_texts = [status.full_text for status in tweets] embeddings = list( basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) for index, status in enumerate(tweets): print(index) print(status.full_text) print("----") # embedding = basilica_api_client.embed_sentence(status.full_text, # model="twitter") # todo: prefer to make a single request to basilica with all the tweet # texts, instead of a request per tweet # print(len(embedding)) embedding = embeddings[index] # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text db_tweet.embedding = embedding db.session.add(db_tweet) db.session.commit() return "OK"
def fetch_user_data(screen_name): print("INFO: fetching twitter for inforation for: ", screen_name) # Fetch user info from the Twitter API user = twitter_api.get_user(screen_name) # Grab a user object from the database or create new db_user = User.query.get(user.id) or User(id=user.id) # Update user object data db_user.screen_name = user.screen_name db_user.name = user.name db_user.location = user.location db_user.followers_count = user.followers_count # Update the database db.session.add(db_user) db.session.commit() print("INFO: update user information in the db for: ", screen_name) # Fetch the user's tweets statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=10) print("INFO: just recevied Twitter statuses number = ", len(statuses)) # Fetch embeddings for each tweet tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_connection.embed_sentences(tweet_texts, model="twitter")) print("INFO: just received tweet embeddings number = ", len(embeddings)) # store tweets and associated embeddings in the database ctr = 0 print("INFO: just before look") for idx, status in enumerate(statuses): db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id db_tweet.full_text = status.full_text embedding = embeddings[idx] db_tweet.embedding = embedding db.session.add(db_tweet) ctr = ctr + 1 print("INFO: storing tweets and embeddings in the database number = ", ctr) db.session.commit() flash( f'Just stored tweet and embedding information into the database! Number of updates: {ctr}', "success") return redirect("/call_twitter")
def get_user(screen_name=None): print(screen_name) # api = twitter_api_client() twitter_user = twitter_api_client.get_user(screen_name) statuses = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150, exclude_replies=True, include_rts=False) print("STATUSES COUNT:", len(statuses)) #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]}) # get existing user from the db or initialize a new one: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() #return "OK" #breakpoint() # basilica_api = basilica_api_client() all_tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) # TODO: explore using the zip() function maybe... counter = 0 for status in statuses: print(status.full_text) print("----") # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text embedding = embeddings[counter] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) counter += 1 db.session.commit() #breakpoint() # return "OK" return render_template("user.html", user=db_user, tweets=statuses) # tweets=db_tweets
def create_user(): print("FORM DATA", dict(request.form)) # return jsonify({ # "message": "it works!" # }) new_user = User( screen_name=request.form["screen_name"] ) screen_name = request.form["screen_name"] print(screen_name) twitter_user = twitter_api_client.get_user(screen_name) statuses = twitter_api_client.user_timeline( screen_name, tweet_mode="extended", count=150) print("STATUSES COUNT:", len(statuses)) # return jsonify({"user": user._json, "tweets": [s._json for s in statuses]}) # get existing user from the db or initialize a new one: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() # return "OK" # breakpoint() all_tweet_texts = [status.full_text for status in statuses] embeddings = list(basilica_api_client().embed_sentences( all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) counter = 0 for status in statuses: print(status.full_text) print("----") # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text embedding = embeddings[counter] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) counter += 1 db.session.commit() # return "OK" # tweets=db_tweets return render_template("user.html", user=db_user, tweets=statuses)
def get_user(screen_name=None): print(screen_name) twitter_user = twitter_api.get_user(screen_name) statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=150, exclude_replies=True, include_rts=False) # return jsonify({ # "user": user._json, # "tweets": [status._json for status in statuses] # }) # get existing user from the db or initialize a new one: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() #breakpoint() # return "OK" basilica_api = basilica_connection all_tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_connection.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) # TODO: explore using the zip() function maybe... counter = 0 for status in statuses: print(status.full_text) print("----") #print(dir(status)) # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text #embedding = basilica_client.embed_sentence(status.full_text, model="twitter") # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet embedding = embeddings[counter] print(len(embedding)) db_tweet.embedding = embeddings[counter] db.session.add(db_tweet) counter += 1 db.session.commit() #breakpoint() return "OK"
def fetch_data(screen_name): print("FETCHING..", screen_name) # fetch user info user = twitter_api.get_user(screen_name) # store user info in database db_user = User.query.get(user.id) or User(id=user.id) db_user.screen_name = user.screen_name db_user.name = user.name db_user.location = user.location db_user.followers_count = user.followers_count db.session.add(db_user) db.session.commit() # fetch their tweets #statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=35, exclude_replies=True, include_rts=False) statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=150) print("STATUSES", len(statuses)) # fetch embedding for each tweet tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_conn.embed_sentences(tweet_texts, model="twitter")) print("EMBEDDINGS", len(embeddings)) # store tweets in db w embeddings for index, status in enumerate(statuses): print(status.full_text) print("----") #print(dir(status)) # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text #embedding = basilica_client.embed_sentence(status.full_text, model="twitter") # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet embedding = embeddings[index] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) db.session.commit() return f"FETCHED {screen_name} OK"
def fetch_user_data(screen_name): print('Fetching...', screen_name) # fetch user info user = twitter_api.get_user(screen_name) # store user in database db_user = User.query.get(user.id) or User(id=user.id) db_user.screen_name = user.screen_name db_user.name = user.name db_user.location = user.location db_user.followers_count = user.followers_count db.session.add(db_user) # fetch their tweets statuses = twitter_api.user_timeline(screen_name, tweet_mode='extended', count=100, exclude_replies=True, include_rts=False) print('Statuses:', len(statuses)) tweet_texts = [status.full_text for status in statuses] # fetch embedding for each tweet embeddings = list( basilica_connection.embed_sentences(tweet_texts, model="twitter")) print('Embeddings:', len(embeddings)) for index, status in enumerate(statuses): print(status.full_text) print("----") #print(dir(status)) # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text embedding = embeddings[index] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) # store tweets in database( w/embeddings) db.session.commit() return jsonify({'user': user._json, 'num_tweets': len(statuses)})
def get_user(screen_name=None): print(screen_name) twitter_user = twitter_api.get_user(screen_name) statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=150) # get existing user from the db or initialize a new one: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() # return "OK" # breakpoint() all_tweet_texts = [status.full_text for status in statuses] embeddings = list(basilica_connection.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) counter = 0 for status in statuses: print(status.full_text) print("----") # print(dir(status)) # get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text # embedding = basilica_connection.embed_sentence(status.full_text, # model="twitter") # todo: prefer to make a single request to basilica with all the # tweet texts, instead of a request per tweet db_tweet.embedding = embeddings[counter] db.session.add(db_tweet) counter += 1 db.session.commit() # breakpoint() return "OK"
def fetch_user_data(screen_name): # print("Fetching:", screen_name) # 1. Fetch user info user = twitter_api.get_user(screen_name) # 2. Store user info in DB db_user = User.query.get(user.id) or User(id=user.id) db_user.screen_name = user.screen_name db_user.name = user.name db_user.location = user.location db_user.followers_count = user.followers_count db.session.add(db_user) db.session.commit() # 3. Fetch their tweets users_tweets = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=200, exclude_replies=True, include_rts=False) # 4. Fetch embedding for tweet tweet_texts = [tweets.full_text for tweets in users_tweets] embeddings = list(basilica_connection.embed_sentences(tweet_texts, model="twitter")) # print("EMBEDDINGS", len(embeddings)) # 5. Store tweets in database with embedding for index, tweets in enumerate(users_tweets): db_tweet = Tweet.query.get(tweets.id) or Tweet(id=tweets.id) db_tweet.user_id = tweets.author.id db_tweet.full_text = tweets.full_text embedding = embeddings[index] print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) db.session.commit() # return jsonify({"user": user._json, "num_tweets": len(users_tweets)}) return redirect("/tweets")
def get_user(screen_name=None): print(screen_name) # Use the input screen name to get user info and save as attributes of twitter_user # (using get_user method on twitter api) twitter_user = twitter_api_client.get_user(screen_name) # Use the input screen name to get latest tweets (using user_timeline method on twitter_api) # exclude_replies=True, include_rts=False - excludes replies and retweets - causes a different amount of tweets per user # Collect 150 latest tweets from each user statuses = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150) print("STATUSES COUNT:", len(statuses)) #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]}) ## ADD USER TABLE INFORMATION TO DB # Get existing user from the db or initialize a new one if it doesn't exist yet: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count # Add and commit changes to the database user table db.session.add(db_user) db.session.commit() #return "OK" #breakpoint() # ADD TWEETS TO TWEET TABLE IN DB # List comprehension to create a list of strings to feed to Basilica model all_tweet_texts = [status.full_text for status in statuses] # Use Basilica to embed the written words of the tweets as numeric values # Define twitter specific model for Basilica to use embeddings = list( basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) # Store each tweet in the database # For each tweet in the list of tweets pulled from the twitter api above and stored as statuses counter = 0 for status in statuses: print(status.full_text) print("----") # Get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) # Set user_id and the full text of the tweet attribute of instance db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text embedding = basilica_api_client.embed_sentence( status.full_text, model="twitter" ) # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet embedding = embeddings[counter] print(len(embedding)) # Set the corresponding embedding from our list of embeddings db_tweet.embedding = embeddings[counter] # Add the tweet to the database db.session.add(db_tweet) # We are using the counter to identify the series of tweets counter += 1 # Commit changes to database table db.session.commit() #breakpoint() # This is what the front end website will see #return "User added" return render_template("user.html", user=db_user, tweets=statuses) # tweets=db_tweets
def create_user(): screen_name = request.form['screen_name'] # Use the input screen name to get user info and save as attributes of twitter_user # (using get_user method on twitter api) twitter_user = twitter_api_client.get_user(screen_name) # Use the input screen name to get latest tweets (using user_timeline method on twitter_api) # exclude_replies=True, include_rts=False - excludes replies and retweets - causes a different amount of tweets per user # Collect 150 latest tweets from each user statuses = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=150) # ADD USER TABLE INFORMATION TO DB # Get existing user from the db or initialize a new one if it doesn't exist yet: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) # Set attributes for db user equal to info gathered from twitter db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count # Add and commit changes to the database user table db.session.add(db_user) db.session.commit() # ADD TWEETS TO TWEET TABLE IN DB # List comprehension to create a list of strings to feed to Basilica model all_tweet_texts = [status.full_text for status in statuses] # Use Basilica to embed the written words of the tweets as numeric values # Define twitter specific model for Basilica to use embeddings = list( basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) # Store each tweet in the database # For each tweet in the list of tweets pulled from the twitter api above and stored as statuses counter = 0 for status in statuses: print(status.full_text) print("----") # Get existing tweet from the db or initialize a new one: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) # Set user_id and the full text of the tweet attribute of instance db_tweet.user_id = status.author.id db_tweet.full_text = status.full_text # Set the corresponding embedding from our list of embeddings db_tweet.embedding = embeddings[counter] # Add the tweet to the database db.session.add(db_tweet) # We are using the counter to identify the series of tweets counter += 1 # Commit changes to database table db.session.commit() # This is what the front end website will see # Can customize this to include the data that we saved to the database flash(f"User '{db_user.screen_name}' created successfully!", "success") return redirect("/")
def fetch_user(screen_name=None): print(screen_name) # Fetching data from twitter API twitter_user = twitter_api_client.get_user(screen_name) # get existing user from the db or initialize a new one: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() # return "OK" # breakpoint() # Fetch Tweets!!! tweets = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=250, exclude_replies=True, include_rts=False) print("STATUSES COUNT:", len(tweets)) # return jsonify({"user": user._json, "tweets": [s._json for s in statuses]}) # extract just the text from the status objects, because tweets have a lot more than text all_tweet_texts = [tweet.full_text for tweet in tweets] num_embeddings = list( basilica_api_client.embed_sentences(all_tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(num_embeddings)) # # TODO: explore using the zip() function maybe... for index, status in enumerate(tweets): num_embedding = num_embeddings[index] # STORE TWEETS IN DATABASE # slow loop # for status in tweets: # print(status.full_text) # print("----") # # print(dir(status)) # # get existing tweet from the db or initialize a new one: # num_embedding = basilica_api_client.embed_sentence( # status.full_text, model="twitter") # print(len(num_embedding)) db_tweet = Tweet.query.get(status.id) or Tweet(tweet_id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.text = status.full_text # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet # embedding = embeddings[counter] db_tweet.num_embedding = num_embedding db.session.add(db_tweet) # TODO: Change to context syntax db.session.commit() # # breakpoint() return "OK"
def fetch_user_data(screen_name): print("FETCHING...", screen_name) # # fetch user info # user = twitter_api.get_user(screen_name) # # store user in database # db_user = User.query.get(user.id) or User(id=user.id) db_user.screen_name = user.screen_name db_user.name = user.name db_user.location = user.location db_user.followers_count = user.followers_count db.session.add(db_user) db.session.commit() # # fetch their tweets # #statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=35, exclude_replies=True, include_rts=False) statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=50) print("STATUSES", len(statuses)) # # fetch embedding for each tweet (will give us a list of lists) # tweet_texts = [status.full_text for status in statuses] embeddings = list( basilica_connection.embed_sentences(tweet_texts, model="twitter")) print("NUMBER OF EMBEDDINGS", len(embeddings)) # # store tweets in database (w/embeddings) # #counter =0 for index, status in enumerate(statuses): print(status.full_text) print("----") db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text # # fetching corresponding embedding # embedding = basilica_connection.embed_sentence(status.full_text, model="twitter") #embedding = embeddings[counter] embedding = embeddings[index] #print(len(embedding)) db_tweet.embedding = embedding db.session.add(db_tweet) #counter+=1 db.session.commit() return f"FETCHED {screen_name} OK"