예제 #1
0
def window_diversity(window, n_topics, iters, thresh):
    artist_count = window.groupby(["artist"])["user"].count().reset_index()
    window = window[window["artist"].\
     isin(artist_count[artist_count["user"]>=thresh]["artist"])]
    ui_matrix = window[["user", "artist", "sum"]]\
    .pivot(index = "user", columns = "artist", values = "sum")
    users = ui_matrix.index
    ui_matrix = ui_matrix.fillna(0)
    ui_matrix_tfidf = np.multiply(tf, idf)
    ui_lda = LatentDirichletAllocation(n_topics=n_topics,
                                       max_iter=iters,
                                       learning_method='online',
                                       learning_offset=50.,
                                       random_state=0).fit(ui_matrix_tfidf)
    ti_matrix = ui_lda.components_
    ut_matrix = ui_lda.fit_transform(ui_matrix_tfidf)
    diversity = hf.LDADiversity(ut_matrix)
    user_diversity = pd.DataFrame({"diversity": diversity}, index=users)
    return (user_diversity)
예제 #2
0
def get_recommendations(window_data, n_users=None, weights=None):
    entropy = hf.LDADiversity(window_data["it"])
    popularity = (window_data["ui"] > 0).sum(axis=0)
    users = window_data["users"]
    if weights == None:
        weights = np.array([10, 1, 1])
    if n_users == None:
        n_users = len(users)
    r_weighted_all = {}
    for i in range(n_users):
        user = users[i]
        current_artists_idx = np.where(window_data["ui"].loc[user] > 0)[0]
        sim_rec = top_sim(window_data["artists"], current_artists_idx,
                          window_data["artist_sim"])
        div_rec = top_div(window_data["artists"], current_artists_idx, entropy,
                          popularity.values)
        clust_rec = top_clustering(window_data["artists"], current_artists_idx,
                                   window_data["artist_sim"])
        rec_sum = pd.merge(sim_rec, div_rec, on="artist")
        rec_sum = pd.merge(rec_sum, clust_rec, on="artist")
        r_weighted = weighted_recommendations(rec_sum, weights)
        r_weighted_all[user] = r_weighted
    return (r_weighted_all)
예제 #3
0
iu_matrix_tfidf = np.multiply(tf, idf)

#transform item_user matrix to item_topic and topic_user matrix
iu_lda = LatentDirichletAllocation(n_topics=n_topics,
                                   max_iter=iters,
                                   learning_method='online',
                                   learning_offset=50.,
                                   random_state=0).fit(iu_matrix_tfidf)

tu_matrix = iu_lda.components_
it_matrix = iu_lda.fit_transform(iu_matrix_tfidf)

user_sim = hf.LDASim(ut_matrix)
item_sim = hf.LDASim(it_matrix)

user_div = hf.LDADiversity(ut_matrix)

hf.LDAClustering(item_sim, ui_matrix)

hf.draw_graph(graph)

#Clustering bipartite graph
#forming bipartite graph W
m, n = ui_matrix.shape
axis_label = np.concatenate((ui_matrix.index, ui_matrix.columns))
W = np.block([[np.zeros((m, m)), ui_matrix], [ui_matrix.T, np.zeros((n, n))]])
D = np.diag(W.sum(axis=0))
W_scaled = np.linalg.inv(D)**(1 / 2) @ W @ np.linalg.inv(D)**(1 / 2)

u, s, v = np.linalg.svd(W_scaled)
x = np.linalg.inv(D)**(1 / 2) @ u[:, 1]
예제 #4
0
def track_diversity(window_data):
    entropy = hf.LDADiversity(window_data["ut"])
    entropy_labeled = dict(zip(window_data["users"], entropy))
    return (entropy_labeled)