Example #1
0
def predict():
    print("PREDICT ROUTE...")
    print("FORM DATA:", dict(request.form))
    #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'}
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]

    print("-----------------")
    print("FETCHING TWEETS FROM THE DATABASE...")
    # todo: wrap in a try block in case the user's don't exist in the database
    user_a = User.query.filter_by(screen_name=screen_name_a).one()
    user_b = User.query.filter_by(screen_name=screen_name_b).one()
    user_a_tweets = user_a.tweets
    user_b_tweets = user_b.tweets
    #user_a_embeddings = [tweet.embedding for tweet in user_a_tweets]
    #user_b_embeddings = [tweet.embedding for tweet in user_b_tweets]
    print("USER A", user_a.screen_name, len(user_a.tweets))
    print("USER B", user_b.screen_name, len(user_b.tweets))

    print("-----------------")
    print("TRAINING THE MODEL...")
    embeddings = []
    labels = []
    for tweet in user_a_tweets:
        labels.append(user_a.screen_name)
        embeddings.append(tweet.embedding)

    for tweet in user_b_tweets:
        labels.append(user_b.screen_name)
        embeddings.append(tweet.embedding)

    pipeline = make_pipeline(
        ce.OrdinalEncoder(),
        DecisionTreeClassifier(min_samples_leaf=3,
                               random_state=42,
                               max_depth=9))

    pipeline.fit(embeddings, labels)

    print("classifier training score:", pipeline.score(embeddings, labels))
    print("-----------------")
    print("MAKING A PREDICTION...")

    #result_a = classifier.predict([user_a_tweets[0].embedding])
    #result_b = classifier.predict([user_b_tweets[0].embedding])

    example_embedding = basilica_api_client.embed_sentence(tweet_text,
                                                           model="twitter")
    result = pipeline.predict([example_embedding])

    #breakpoint()

    #return jsonify({"message": "RESULTS", "most_likely": result[0]})
    return render_template("prediction_results.html",
                           screen_name_a=screen_name_a,
                           screen_name_b=screen_name_b,
                           tweet_text=tweet_text,
                           screen_name_most_likely=result[0])
Example #2
0
def predict():
    print("PREDICT ROUTE...")
    print("FORM DATA:", dict(request.form)) #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'}
    screen_name_a = request.form["screen_name_a"]
    print(screen_name_a)
    screen_name_b = request.form["screen_name_b"]
    print(screen_name_b)
    tweet_text = request.form["tweet_text"]

    print("-----------------")
    print("FETCHING TWEETS FROM THE DATABASE...")
    # h/t: https://flask-sqlalchemy.palletsprojects.com/en/2.x/queries/
    # get the embeddings (from the database)
    user_a = User.query.filter_by(screen_name=screen_name_a).first()
    user_b = User.query.filter_by(screen_name=screen_name_b).first()
    user_a_tweets = user_a.tweets
    print(user_a_tweets)
    user_b_tweets = user_b.tweets
    print(user_b_tweets)
    print("FETCHED TWEETS", len(user_a_tweets), len(user_b_tweets))

    print("-----------------")
    print("TRAINING THE MODEL...")
    # X values / inputs: embeddings
    # Y values / labels: screen_names

    classifier = LogisticRegression()

    embeddings = []
    labels = []

    for tweet in user_a_tweets:
        embeddings.append(tweet.embedding)
        labels.append(screen_name_a) # tweet.user.screen_name

    for tweet in user_b_tweets:
        embeddings.append(tweet.embedding)
        labels.append(screen_name_b) # tweet.user.screen_name

    classifier.fit(embeddings, labels)

    print("-----------------")
    print("MAKING A PREDICTION...")
    #example_embed_a = user_a_tweets[3].embedding
    #example_embed_b = user_b_tweets[3].embedding
    #result = classifier.predict([example_embed_a, example_embed_b])

    embedding = basilica_api_client.embed_sentence(tweet_text, model="twitter")

    result = classifier.predict([embedding])

    return render_template("prediction_results.html",
        screen_name_a=screen_name_a,
        screen_name_b=screen_name_b,
        tweet_text=tweet_text,
        screen_name_most_likely=result[0]
    )
def predict():
    print("PREDICT ROUTE...")
    print("FORM DATA:", dict(request.form))

    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]

    print("-----------------")
    print("FETCHING TWEETS FROM THE DATABASE...")
    # todo: wrap in a try block in case the user's don't exist in the database

    # f"SELECT * FROM users WHERE screen_name = {screen_name_a}"

    user_a = User.query.filter_by(screen_name=screen_name_a).one()
    user_b = User.query.filter_by(screen_name=screen_name_b).one()
    user_a_tweets = user_a.tweets
    # Tweet.query.filter_by(user_id = user_a.id).one()
    user_b_tweets = user_b.tweets
    # Tweet.query.filter_by(user_id = user_b.id).one()
    # user_a_embeddings = [tweet.embedding for tweet in user_a_tweets]
    # user_b_embeddings = [tweet.embedding for tweet in user_b_tweets]
    print("USER A", user_a.screen_name, len(user_a.tweets))
    print("USER B", user_b.screen_name, len(user_b.tweets))

    print("-----------------")
    print("TRAINING THE MODEL...")
    embeddings = []
    labels = []

    for tweet in user_a_tweets:
        embeddings.append(tweet.embedding)
        labels.append(user_a.screen_name)

    for tweet in user_b_tweets:
        embeddings.append(tweet.embedding)
        labels.append(user_b.screen_name)

    classifier = LogisticRegression()
    classifier.fit(embeddings, labels)

    print("-----------------")
    print("MAKING A PREDICTION...")
    # result_a = classifier.predict([user_a_tweets[0].embedding])
    # result_b = classifier.predict([user_b_tweets[0].embedding])
    # results = classifier.predict([embeddings[0]])[0] #> elon

    example_embedding = basilica_connection.embed_sentence(tweet_text,
                                                           model="twitter")
    result = classifier.predict([example_embedding])

    return render_template("prediction_results.html",
                           screen_name_a=screen_name_a,
                           screen_name_b=screen_name_b,
                           tweet_text=tweet_text,
                           screen_name_most_likely=result[0])
Example #4
0
def twitoff_predict():
    print(f"INFO: begin predict route processing")
    # Grab form data
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]
    print(
        f"INFO: name1: {screen_name_a} name2: {screen_name_b} tweet: {tweet_text}"
    )

    # Grab tweet embeddings associated with the entered data
    tweet_embeddings = []
    tweet_labels = []
    # Fetch the objects for user a and user b from the database
    user_a = User.query.filter(User.screen_name == screen_name_a).one()
    user_b = User.query.filter(User.screen_name == screen_name_b).one()

    tweets_a = user_a.tweets
    tweets_b = user_b.tweets
    all_tweets = tweets_a + tweets_b

    # Iterate through tweets
    for tweet in all_tweets:
        if tweet.embedding == None or tweet.embedding == '':
            # Missing embedding value... let's skip
            continue

        tweet_embeddings.append(tweet.embedding)
        tweet_labels.append(tweet.user.screen_name)

    print("EMBEDDINGS:", len(tweet_embeddings), "LABELS:", len(tweet_labels))

    # Define and fit a model
    print(f"INFO: generating a Logistic Regression model")
    classifier = LogisticRegression(random_state=0,
                                    solver="lbfgs",
                                    multi_class="multinomial")
    print(f"INFO: fitting the Logistic Regression model")
    print(f"INFO: just before the fit step")
    classifier.fit(tweet_embeddings, tweet_labels)
    print(f"INFO: just after the fit step")

    # Generate a prediction
    print(f"INFO: just before the basilica step")
    example_tweet_embedding = basilica_conn.embed_sentence(tweet_text,
                                                           model="twitter")
    print(f"INFO: just before the prediction step")
    result = classifier.predict([example_tweet_embedding])
    print(f"INFO: just after the prediction step")

    return render_template("prediction_results.html",
                           screen_name_a=screen_name_a,
                           screen_name_b=screen_name_b,
                           tweet_text=tweet_text,
                           screen_name_most_likely=result[0])
def predict():
    print("PREDICT ROUTE...")
    print("FORM DATA:", dict(request.form))
    #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'}
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]

    print("-----------------")
    print("FETCHING TWEETS FROM THE DATABASE...")
    # todo: wrap in a try block in case the user's don't exist in the database
    user_a = User.query.filter_by(screen_name = screen_name_a).one()
    user_b = User.query.filter_by(screen_name = screen_name_b).one()
    user_a_tweets = user_a.tweets
    user_b_tweets = user_b.tweets
    #user_a_embeddings = [tweet.embedding for tweet in user_a_tweets]
    #user_b_embeddings = [tweet.embedding for tweet in user_b_tweets]
    print("USER A", user_a.screen_name, len(user_a.tweets))
    print("USER B", user_b.screen_name, len(user_b.tweets))
    # consider returning a warning message/ redirect if data isn't in the database

    print("-----------------")
    print("TRAINING THE MODEL...")
    embeddings = []
    labels = []
    for tweet in user_a_tweets:
        labels.append(user_a.screen_name)
        embeddings.append(tweet.embedding)

    for tweet in user_b_tweets:
        labels.append(user_b.screen_name)
        embeddings.append(tweet.embedding)

    classifier = LogisticRegression() # for example
    classifier.fit(embeddings, labels)

    print("-----------------")
    print("MAKING A PREDICTION...")
    #result_a = classifier.predict([user_a_tweets[0].embedding])
    #result_b = classifier.predict([user_b_tweets[0].embedding])

    
    example_embedding = basilica_api_client.embed_sentence(tweet_text,model="twitter")
    result = classifier.predict([example_embedding])
    #breakpoint()

    #return jsonify({"message": "RESULTS", "most_likely": result[0]})
    return render_template("prediction_results.html",
        screen_name_a=screen_name_a,
        screen_name_b=screen_name_b,
        tweet_text=tweet_text,
        screen_name_most_likely= result[0]
    )
Example #6
0
def twitoff_predict():

    print("PREDICT ROUTE...")
    print("FORM DATA:", dict(request.form))
    #> {'screen_name_a': 'elonmusk', 'screen_name_b': 'Cardstud', 'tweet_text': 'Example tweet text here'}
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]
    print(screen_name_a, screen_name_b, tweet_text)

    #
    # train a model
    #

    # classifier.fit(X, y)

    tweet_embeddings = []
    tweet_labels = []

    # from db we need users and tweets
    user_a = User.query.filter(User.screen_name == screen_name_a).one()
    user_b = User.query.filter(User.screen_name == screen_name_b).one()
    tweets_a = user_a.tweets
    tweets_b = user_b.tweets

    all_tweets = tweets_a + tweets_b

    for tweet in all_tweets:
        tweet_embeddings.append(tweet.embedding)
        tweet_labels.append(tweet.user.screen_name)

    print("EMBEDDINGS", len(tweet_embeddings), "LABELS", len(tweet_labels))

    classifier = LogisticRegression(random_state=0,
                                    solver='lbfgs',
                                    multi_class="multinomial")
    classifier.fit(tweet_embeddings, tweet_labels)

    #
    # make and return a prediction
    #

    example_tweet_embedding = basilica_connection.embed_sentence(
        tweet_text, model="twitter")
    result = classifier.predict([example_tweet_embedding])
    print("RESULT:", result[0])

    return render_template("prediction_results.html",
                           screen_name_a=screen_name_a,
                           screen_name_b=screen_name_b,
                           tweet_text=tweet_text,
                           screen_name_most_likely=result[0])
Example #7
0
def twitoff_predict():    
    print("PREDICT ROUTE...")
    print("FORM DATA:", dict(request.form))
    #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'}
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]
    
    print(screen_name_a, screen_name_b, tweet_text)

    #
    # train a model
    #

    tweet_embeddings = []
    tweet_labels = []
    user_a = User.query.filter(User.screen_name == screen_name_a).one()
    user_b = User.query.filter(User.screen_name == screen_name_b).one()
    tweets_a = user_a.tweets
    tweets_b = user_b.tweets
    all_tweets = tweets_a + tweets_b 
    # [1,2,3] + [4,5,6]
    # = [1, 2, 3, 4, 5, 6]

    for tweet in all_tweets:
        # add embeddings and the corresponding user name to the lists above
        tweet_embeddings.append(tweet.embedding)
        tweet_labels.append(tweet.user.screen_name)

    print('EMBEDDINGS:', len(tweet_embeddings), 'LABELS:', len(tweet_labels))

    classifier = LogisticRegression(random_state=42, solver='lbfgs', multi_class='multinomial')
    classifier.fit(tweet_embeddings, tweet_labels)

    # fetch - refer to book_routes.py

    # TODO: make a prediction and return it

    example_tweet_embedding = basilica_connection.embed_sentence(tweet_text, model='twitter')
    result = classifier.predict([example_tweet_embedding])
    # Reshape your data either using array.reshape(-1, 1) if your data has a single feature 
    # or array.reshape(1, -1) if it contains a single sample.
    # fix - put 'example_tweet_embedding' in brackets to make it into a list
    print('RESULT:', result[0])

    return render_template('prediction_results.html', 
        screen_name_a=screen_name_a,
        screen_name_b=screen_name_b,
        tweet_text=tweet_text,
        screen_name_most_likely=result[0]
    )
Example #8
0
def predict():
    print("PREDICT ROUTE...")
    print("FORM DATA:", dict(request.form))
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]

    print("-----------------")
    print("FETCHING TWEETS FROM THE DATABASE...")
    # get the tweets from the database:
    user_a = User.query.filter_by(screen_name=screen_name_a).first()
    user_b = User.query.filter_by(screen_name=screen_name_b).first()
    user_a_tweets = user_a.tweets
    user_b_tweets = user_b.tweets
    print("fetched tweets", len(user_a_tweets), len(user_b_tweets))

    print("-----------------")
    print("TRAINING THE MODEL...")

    # x values / inputs : embeddings
    # y values / labels : screen_names

    classifier = LogisticRegression()

    embeddings = []
    labels = []

    for tweet in user_a_tweets:
        embeddings.append(tweet.embedding)
        labels.append(screen_name_a)

    for tweet in user_b_tweets:
        embeddings.append(tweet.embedding)
        labels.append(screen_name_b)

    classifier.fit(embeddings, labels)

    print("-----------------")
    print("MAKING A PREDICTION...")

    embedding = basilica_api_client.embed_sentence(tweet_text, model="twitter")

    result = classifier.predict([embedding])

    return render_template("prediction_results.html",
                           screen_name_a=screen_name_a,
                           screen_name_b=screen_name_b,
                           tweet_text=tweet_text,
                           screen_name_most_likely=result[0])
Example #9
0
def get_user(screen_name=None):
    print(screen_name)

    twitter_user = twitter_api_client.get_user(screen_name)
    statuses = twitter_api_client.user_timeline(screen_name,
                                                tweet_mode="extended",
                                                count=150,
                                                exclude_replies=True,
                                                include_rts=False)
    print("STATUSES COUNT:", len(statuses))
    #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]})

    # get existing user from the db or initialize a new one:
    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count
    db.session.add(db_user)
    db.session.commit()
    #return "OK"
    #breakpoint()

    # all_tweet_texts = [status.full_text for status in statuses]
    # embeddings = list(basilica_api_client.embed_sentences(all_tweet_texts, model="twitter"))
    # print("NUMBER OF EMBEDDINGS", len(embeddings))

    # TODO: explore using the zip() function maybe...
    # counter = 0
    for status in statuses:
        print(status.full_text)
        print("----")
        #print(dir(status))
        # get existing tweet from the db or initialize a new one:
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        embedding = basilica_api_client.embed_sentence(
            status.full_text, model="twitter"
        )  # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet
        # embedding = embeddings[counter]
        print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        # counter+=1
    db.session.commit()
    return "OK"
Example #10
0
def twitoff_prediction():
    #set up our routes
    print("FORM DATA:", dict(request.form)
          )  #displays our request.form[""]. useful to check our data variables
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]
    screen_name_most_likely = "TODO"
    #breakpoint() Run prediction form on webapp. check terminal to see how to set up above variables.

    ##TRAIN MODEL:

    #instantiate the model.
    model = LogisticRegression()

    #get users. --similar to how we got them in from twitter_routes get_user()
    user_a = User.query.filter(User.screen_name == screen_name_a).one()
    user_b = User.query.filter(User.screen_name == screen_name_b).one()

    #get user tweets
    user_a_tweets = user_a.tweets
    user_b_tweets = user_b.tweets

    embeddings = []  #get  connection from basilica_service to retrieve
    labels = []
    all_tweets = user_a_tweets + user_b_tweets
    for tweet in all_tweets:
        embeddings.append(
            tweet.embedding)  #.embedding comes from the Tweet class model
        labels.append(tweet.user.screen_name)

    #breakpoint() -- play with model is it working.
    model.fit(embeddings, labels)

    ##MAKE PREDICTION
    example_embedding = basilica_connection.embed_sentence(tweet_text,
                                                           model="twitter")
    result = model.predict([example_embedding])
    screen_name_most_likely = result[0]

    return render_template("prediction_results.html",
                           screen_name_a=screen_name_a,
                           screen_name_b=screen_name_b,
                           tweet_text=tweet_text,
                           screen_name_most_likely=screen_name_most_likely)
Example #11
0
def twitoff_prediction():
    print("FORM DATA:", dict(request.form))
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]

    #
    # TRAIN THE MODEL
    #
    # inputs: embeddings for each tweet
    # labels: screen name for each tweet

    model = LogisticRegression(max_iter=1000)

    user_a = User.query.filter(User.screen_name == screen_name_a).one()
    user_b = User.query.filter(User.screen_name == screen_name_b).one()

    user_a_tweets = user_a.tweets
    user_b_tweets = user_b.tweets

    embeddings = []
    labels = []
    all_tweets = user_a_tweets + user_b_tweets
    for tweet in all_tweets:
        embeddings.append(tweet.embedding)
        labels.append(tweet.user.screen_name)

    model.fit(embeddings, labels)

    #
    # MAKE PREDICTION
    #

    example_embedding = basilica_connection.embed_sentence(tweet_text, model="twitter")
    result = model.predict([example_embedding])
    screen_name_most_likely = result[0]

    return render_template("prediction_results.html",
        screen_name_a=screen_name_a,
        screen_name_b=screen_name_b,
        tweet_text=tweet_text,
        screen_name_most_likely=screen_name_most_likely
    )
Example #12
0
def tweetoff_prediction_results():
    print("FORM DATA:", dict(request.form))
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]

    model = LogisticRegression(max_iter=1000)

    user_a = User.query.filter(User.screen_name == screen_name_a).one()
    user_b = User.query.filter(User.screen_name == screen_name_b).one()

    user_a_tweets = user_a.tweets
    user_b_tweets = user_b.tweets

    embeddings = []
    labels = []
    # all_tweets = user_a_tweets + user_b_tweets
    # for tweet in all_tweets:
    #     embeddings.append(tweet.embedding)
    #     labels.append(tweet.user.screen_name)

    for tweet in user_a_tweets:
        labels.append(user_a.screen_name)
        embeddings.append(tweet.embedding)

    for tweet in user_b_tweets:
        labels.append(user_b.screen_name)
        embeddings.append(tweet.embedding)

    model.fit(embeddings, labels)

    example_embedding = basilica_connection.embed_sentence(tweet_text,
                                                           model="twitter")
    result = model.predict([example_embedding])

    screen_name_most_likely = result[0]

    return render_template("prediction_results.html",
                           screen_name_a=screen_name_a,
                           screen_name_b=screen_name_b,
                           tweet_text=tweet_text,
                           screen_name_most_likely=screen_name_most_likely)
Example #13
0
def fetch_user_data(screen_name):
    print("FETCHING...", screen_name)

    #
    # fetch user info
    #
    user = twitter_api.get_user(screen_name)

    #
    # store user in database
    #

    db_user = User.query.get(user.id) or User(id=user.id)
    db_user.screen_name = user.screen_name
    db_user.name = user.name
    db_user.location = user.location
    db_user.followers_count = user.followers_count

    db.session.add(db_user)
    db.session.commit()

    #
    # fetch their tweets
    #

    #statuses = twitter_api.user_timeline(screen_name, tweet_mode="extended", count=35, exclude_replies=True, include_rts=False)
    statuses = twitter_api.user_timeline(screen_name,
                                         tweet_mode="extended",
                                         count=50)
    print("STATUSES", len(statuses))
    #
    # fetch embedding for each tweet (will give us a list of lists)
    #
    tweet_texts = [status.full_text for status in statuses]
    embeddings = list(
        basilica_connection.embed_sentences(tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    #
    # store tweets in database (w/embeddings)
    #

    #counter =0
    for index, status in enumerate(statuses):
        print(status.full_text)
        print("----")
        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text
        #
        # fetching corresponding embedding
        #
        embedding = basilica_connection.embed_sentence(status.full_text,
                                                       model="twitter")
        #embedding = embeddings[counter]
        embedding = embeddings[index]
        #print(len(embedding))
        db_tweet.embedding = embedding
        db.session.add(db_tweet)
        #counter+=1

    db.session.commit()

    return f"FETCHED {screen_name} OK"
Example #14
0
def predict():
    print("PREDICT ROUTE...")
    print("FORM DATA:", dict(request.form))
    #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'}
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]
    print(screen_name_a, screen_name_b, tweet_text)


    # Train the model


    # Fetch users and tweets from the database

    print("-----------------")
    print("FETCHING TWEETS FROM THE DATABASE...")
    # todo: wrap in a try block in case the user's don't exist in the database


    # Filter so that we select only the username equal to screen_name_a (the first one the user picked on the form)
    # No row was found for one()
    user_a = User.query.filter_by(screen_name=screen_name_a).one_or_none() # try: .first()
    user_b = User.query.filter_by(screen_name=screen_name_b).one_or_none() # also try: one or none() # no error handling required


    # Select only tweets from user a and user b

    user_a_tweets = user_a.tweets # still getting 'NoneType' object has no attribute 'tweet'
    user_b_tweets = user_b.tweets 

    #user_a_embeddings = [tweet.embedding for tweet in user_a_tweets]
    #user_b_embeddings = [tweet.embedding for tweet in user_b_tweets]
    print("USER A", user_a.screen_name, len(user_a.tweets))
    print("USER B", user_b.screen_name, len(user_b.tweets))
    # consider returning a warning message / redirect if the data isn't in the database


    print("-----------------")
    print("TRAINING THE MODEL...")
    embeddings = []
    labels = []
 
    for tweet in user_a_tweets:
        labels.append(user_a.screen_name)
        embeddings.append(tweet.embedding)
 
    for tweet in user_b_tweets:
        labels.append(user_b.screen_name)
        embeddings.append(tweet.embedding)
 
    classifier = LogisticRegression() # for example
    classifier.fit(embeddings, labels)
 
    print("-----------------")
    print("MAKING A PREDICTION...")
 
    #result_a = classifier.predict([user_a_tweets[0].embedding])
    #result_b = classifier.predict([user_b_tweets[0].embedding])
    #results = classifier.predict([embeddings[0]])[0] #> elon
 
    
    # Use Basilica connection to embed input tweet, use Basilica model trained on twitter

    example_embedding = basilica_api_client.embed_sentence(tweet_text, model="twitter")
    result = classifier.predict([example_embedding])
    #breakpoint()


    # Direct user to prediction result page after running prediction
    
    #return jsonify({"message": "RESULTS", "most_likely": result[0]})
    return render_template("prediction_results.html",
        screen_name_a=screen_name_a,
        screen_name_b=screen_name_b,
        tweet_text=tweet_text,
        screen_name_most_likely= result[0]
    )
    
Example #15
0
def get_user(screen_name=None):
    print(screen_name)

    # Use the input screen name to get user info and save as attributes of twitter_user

    # (using get_user method on twitter api)

    twitter_user = twitter_api_client.get_user(screen_name)

    # Use the input screen name to get latest tweets (using user_timeline method on twitter_api)

    # exclude_replies=True, include_rts=False - excludes replies and retweets - causes a different amount of tweets per user

    # Collect 150 latest tweets from each user

    statuses = twitter_api_client.user_timeline(screen_name,
                                                tweet_mode="extended",
                                                count=150)
    print("STATUSES COUNT:", len(statuses))
    #return jsonify({"user": user._json, "tweets": [s._json for s in statuses]})

    ## ADD USER TABLE INFORMATION TO DB

    # Get existing user from the db or initialize a new one if it doesn't exist yet:

    db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
    db_user.screen_name = twitter_user.screen_name
    db_user.name = twitter_user.name
    db_user.location = twitter_user.location
    db_user.followers_count = twitter_user.followers_count

    # Add and commit changes to the database user table

    db.session.add(db_user)
    db.session.commit()
    #return "OK"
    #breakpoint()

    # ADD TWEETS TO TWEET TABLE IN DB

    # List comprehension to create a list of strings to feed to Basilica model

    all_tweet_texts = [status.full_text for status in statuses]

    # Use Basilica to embed the written words of the tweets as numeric values

    # Define twitter specific model for Basilica to use

    embeddings = list(
        basilica_api_client.embed_sentences(all_tweet_texts, model="twitter"))
    print("NUMBER OF EMBEDDINGS", len(embeddings))

    # Store each tweet in the database

    # For each tweet in the list of tweets pulled from the twitter api above and stored as statuses

    counter = 0
    for status in statuses:
        print(status.full_text)
        print("----")

        # Get existing tweet from the db or initialize a new one:

        db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)

        # Set user_id and the full text of the tweet attribute of instance

        db_tweet.user_id = status.author.id  # or db_user.id
        db_tweet.full_text = status.full_text

        embedding = basilica_api_client.embed_sentence(
            status.full_text, model="twitter"
        )  # todo: prefer to make a single request to basilica with all the tweet texts, instead of a request per tweet
        embedding = embeddings[counter]
        print(len(embedding))

        # Set the corresponding embedding from our list of embeddings

        db_tweet.embedding = embeddings[counter]

        # Add the tweet to the database

        db.session.add(db_tweet)

        # We are using the counter to identify the series of tweets

        counter += 1

    # Commit changes to database table

    db.session.commit()

    #breakpoint()

    # This is what the front end website will see

    #return "User added"

    return render_template("user.html", user=db_user,
                           tweets=statuses)  # tweets=db_tweets
Example #16
0
def predict():
    print("PREDICT ROUTE...")
    print("FORM DATA:", dict(request.form))
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]

    print("-----------------")
    print("FETCHING TWEETS FROM THE DATABASE...")
    # h/t: https://flask-sqlalchemy.palletsprojects.com/en/2.x/queries/

    #get the embeddings from the database or fetch from Twitter
    if User.query.filter_by(screen_name=screen_name_a).first() is None:
        fetch_user(screen_name_a)
        print("User ", screen_name_a, "fetched")
    user_a = User.query.filter_by(screen_name=screen_name_a).first()
    print("user_a", user_a)

    if User.query.filter_by(screen_name=screen_name_b).first() is None:
        fetch_user(screen_name_b)
        print("User ", screen_name_a, "fetched")
    user_b = User.query.filter_by(screen_name=screen_name_b).first()
    print("user_b", user_b)

    user_a_tweets = user_a.tweets
    user_b_tweets = user_b.tweets
    print("FETCHED TWEETS", len(user_a_tweets), len(user_b_tweets))

    print("-----------------")
    print("TRAINING THE MODEL...")

    classifier = LogisticRegression()
    # X values / inputs: embeddings
    # Y values / labels: screen_names

    embeddings = []
    labels = []

    for tweet in user_a_tweets:
        embeddings.append(tweet.embedding)
        labels.append(screen_name_a)  # or tweet.user.screen_name

    for tweet in user_b_tweets:
        embeddings.append(tweet.embedding)
        labels.append(screen_name_b)  # or tweet.user.screen_name

    classifier.fit(embeddings, labels)

    print("-----------------")
    print("MAKING A PREDICTION...")

    # example_embed_a = user_a_tweets[3].embedding
    # example_embed_b = user_b_tweets[3].embedding
    # result = classifier.predict([example_embed_a, example_embed_b])

    embedding = basilica_api_client.embed_sentence(tweet_text, model="twitter")
    result = classifier.predict([embedding])
    image_screen_name = result[0]
    image_user = User.query.filter_by(screen_name=image_screen_name).first()
    image = image_user.picture

    return render_template("prediction_results.html",
                           screen_name_a=screen_name_a,
                           screen_name_b=screen_name_b,
                           tweet_text=tweet_text,
                           screen_name_most_likely=result[0],
                           image=image)
Example #17
0
def predict():
    print("PREDICT ROUTE...")
    print("FORM DATA:", dict(request.form))
    #> {'screen_name_a': 'elonmusk', 'screen_name_b': 's2t2', 'tweet_text': 'Example tweet text here'}
    screen_name_a = request.form["screen_name_a"]
    screen_name_b = request.form["screen_name_b"]
    tweet_text = request.form["tweet_text"]
    #return "OK (TODO)"

    print("-----------------")
    print("FETCHING TWEETS FROM THE DATABASE...")
    # todo: wrap in a try block in case the user's don't exist in the database
    user_a = User.query.filter(User.screen_name == screen_name_a).one()
    user_b = User.query.filter(User.screen_name == screen_name_b).one()
    user_a_tweets = user_a.tweets
    user_b_tweets = user_b.tweets
    #user_a_embeddings = [tweet.embedding for tweet in user_a_tweets]
    #user_b_embeddings = [tweet.embedding for tweet in user_b_tweets]
    print("USER A", user_a.screen_name, len(user_a.tweets))
    print("USER B", user_b.screen_name, len(user_b.tweets))
    #breakpoint()
    # take the same number of tweets for each user
    #min_tweets = min(len(user_a.tweets), len(user_b.tweets))
    #user_a_tweets = user_a_tweets[:min_tweets]
    #user_b_tweets = user_b_tweets[:min_tweets]

    print("-----------------")
    print("TRAINING THE MODEL...")
    embeddings = []
    labels = []
    for tweet in user_a_tweets:
        labels.append(user_a.screen_name)
        embeddings.append(tweet.embedding)

    for tweet in user_b_tweets:
        labels.append(user_b.screen_name)
        embeddings.append(tweet.embedding)

    #breakpoint()
    # inspect the x and y values to make sure they are the best format for training
    # maybe need to impute?
    classifier = LogisticRegression(random_state=0, solver='lbfgs') # for example
    classifier.fit(embeddings, labels)
    # todo: make sure there are an even number of tweets for each user
    #breakpoint()

    print("-----------------")
    print("MAKING A PREDICTION...")
    #result_a = classifier.predict([user_a_tweets[0].embedding])
    #result_b = classifier.predict([user_b_tweets[0].embedding])


    example_embedding = basilica_client.embed_sentence(tweet_text, model="twitter")
    result = classifier.predict([example_embedding])
    #breakpoint()

    #return jsonify({"message": "RESULTS", "most_likely": result[0]})
    return render_template("results.html",
        screen_name_a=screen_name_a,
        screen_name_b=screen_name_b,
        tweet_text=tweet_text,
        screen_name_most_likely=result[0]
    )