def update_user_histories(active_users):
    """

    """
    ## Get Today's Date
    today = datetime.now().date()
    ## Initialize Connection
    user_history_con = sql.connect(USER_HISTORY_DB_PATH)
    cursor = user_history_con.cursor()
    ## Get User Query Dates
    query_dates = {}
    max_date_command = """
    SELECT USER, QUERY_END_DATE
    FROM HISTORY
    WHERE USER='******'
    ORDER BY QUERY_END_DATE
    LIMIT 1;"""
    for user in tqdm(active_users,
                     total=len(active_users),
                     desc="Query Periods",
                     file=sys.stdout):
        res = cursor.execute(max_date_command.format(user))
        result = res.fetchall()
        if len(result) == 0:
            query_dates[user] = (GLOBAL_START_DATE, today.isoformat())
        else:
            if result[0][1] != today.isoformat():
                query_dates[user] = (result[0][1], today.isoformat())
    ## Initialize Reddit Wrapper
    reddit = RedditData()
    ## Query Comment History
    user_comment_histories = []
    for user, (start, stop) in tqdm(query_dates.items(),
                                    total=len(query_dates),
                                    file=sys.stdout,
                                    desc="User Histories"):
        df = reddit.retrieve_author_comments(user,
                                             start_date=start,
                                             end_date=stop)
        try:
            subreddit_counts = df.groupby(
                ["author"])["subreddit"].value_counts().rename(
                    "COMMENT_COUNT").reset_index()
        except:
            print('failed user:', user)
            import pdb
            pdb.set_trace()
        subreddit_counts["QUERY_START_DATE"] = start
        subreddit_counts["QUERY_END_DATE"] = stop
        subreddit_counts.rename(columns={
            "author": "USER",
            "subreddit": "SUBREDDIT"
        },
                                inplace=True)
        user_comment_histories.append(subreddit_counts)
    ## Update Database
    if len(user_comment_histories) > 0:
        user_comment_histories = pd.concat(user_comment_histories).reset_index(
            drop=True)
        user_comment_histories.to_sql(
            name="HISTORY",
            con=user_history_con,
            if_exists="append",
            index=False,
        )
    ## Close Connection
    user_history_con.commit()
    user_history_con.close()
Example #2
0
subreddit_mask = np.nonzero((X_masked > 0).sum(axis=1) >= MIN_SUPPORT)[0]
X_masked = X_masked[subreddit_mask]
rows_masked = [rows[i] for i in subreddit_mask]

## Weight Using BM25
if BM25_WEIGHTING:
    X_masked = bm25_weight(X_masked).tocsr()

## Fit Model
cf = CollaborativeFiltering(factors=N_FACTORS,
                            regularization=REGULARIZATION,
                            iterations=ITERATIONS,
                            num_threads=NUM_THREADS,
                            random_state=RANDOM_STATE)
cf = cf.fit(X_masked, rows=rows_masked, columns=columns_masked)

#####################
### Testing
#####################

## Test Recommendations
reddit = RedditData()
keith = reddit.retrieve_author_comments("HuskyKeith")
keith_counts = keith["subreddit"].tolist()
keith_recs = cf.recommend(keith_counts, 20)

## Test Similarity
cf.get_similar_item("movies")

## Dump Model
cf.dump(f"{MODEL_DIR}{MODEL_NAME}")