def window_diversity(window, n_topics, iters, thresh): artist_count = window.groupby(["artist"])["user"].count().reset_index() window = window[window["artist"].\ isin(artist_count[artist_count["user"]>=thresh]["artist"])] ui_matrix = window[["user", "artist", "sum"]]\ .pivot(index = "user", columns = "artist", values = "sum") users = ui_matrix.index ui_matrix = ui_matrix.fillna(0) ui_matrix_tfidf = np.multiply(tf, idf) ui_lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=iters, learning_method='online', learning_offset=50., random_state=0).fit(ui_matrix_tfidf) ti_matrix = ui_lda.components_ ut_matrix = ui_lda.fit_transform(ui_matrix_tfidf) diversity = hf.LDADiversity(ut_matrix) user_diversity = pd.DataFrame({"diversity": diversity}, index=users) return (user_diversity)
def get_recommendations(window_data, n_users=None, weights=None): entropy = hf.LDADiversity(window_data["it"]) popularity = (window_data["ui"] > 0).sum(axis=0) users = window_data["users"] if weights == None: weights = np.array([10, 1, 1]) if n_users == None: n_users = len(users) r_weighted_all = {} for i in range(n_users): user = users[i] current_artists_idx = np.where(window_data["ui"].loc[user] > 0)[0] sim_rec = top_sim(window_data["artists"], current_artists_idx, window_data["artist_sim"]) div_rec = top_div(window_data["artists"], current_artists_idx, entropy, popularity.values) clust_rec = top_clustering(window_data["artists"], current_artists_idx, window_data["artist_sim"]) rec_sum = pd.merge(sim_rec, div_rec, on="artist") rec_sum = pd.merge(rec_sum, clust_rec, on="artist") r_weighted = weighted_recommendations(rec_sum, weights) r_weighted_all[user] = r_weighted return (r_weighted_all)
iu_matrix_tfidf = np.multiply(tf, idf) #transform item_user matrix to item_topic and topic_user matrix iu_lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=iters, learning_method='online', learning_offset=50., random_state=0).fit(iu_matrix_tfidf) tu_matrix = iu_lda.components_ it_matrix = iu_lda.fit_transform(iu_matrix_tfidf) user_sim = hf.LDASim(ut_matrix) item_sim = hf.LDASim(it_matrix) user_div = hf.LDADiversity(ut_matrix) hf.LDAClustering(item_sim, ui_matrix) hf.draw_graph(graph) #Clustering bipartite graph #forming bipartite graph W m, n = ui_matrix.shape axis_label = np.concatenate((ui_matrix.index, ui_matrix.columns)) W = np.block([[np.zeros((m, m)), ui_matrix], [ui_matrix.T, np.zeros((n, n))]]) D = np.diag(W.sum(axis=0)) W_scaled = np.linalg.inv(D)**(1 / 2) @ W @ np.linalg.inv(D)**(1 / 2) u, s, v = np.linalg.svd(W_scaled) x = np.linalg.inv(D)**(1 / 2) @ u[:, 1]
def track_diversity(window_data): entropy = hf.LDADiversity(window_data["ut"]) entropy_labeled = dict(zip(window_data["users"], entropy)) return (entropy_labeled)