Example #1
0
def reorder_old_eurm(eurm):
    """
    ATTENTION: this function is intended to be used only for old eurms, which are
    ordered by test pids and not by categories.
    :param eurm: the old-ordered eurm
    :return: eurm: the new-ordered eurm
    """

    dr_old = Datareader(mode='online', only_load='True', type='old')

    res = []
    for cat in range(1, 11):
        indices = dr_old.get_test_pids_indices(cat=cat)
        res.append(eurm[indices])

    eurm_new = sps.vstack(res)

    return eurm_new
# rp3b = sps.load_npz(ROOT_DIR + "/data/sub/EURM-rp3beta-online.npz")
# knn_c_i_al = sps.load_npz(ROOT_DIR + "/data/sub/KNN CONTENT ITEM-album-top_k=850-sm_type=cosine-shrink=100.npz")
# knn_c_i_ar = sps.load_npz(ROOT_DIR + "/data/sub/KNN CONTENT ITEM-artist-top_k=850-sm_type=cosine-shrink=100.npz")
nlp = sps.load_npz(ROOT_DIR + "/data/eurm_nlp_offline.npz")
# cf_u = sps.load_npz(ROOT_DIR + "/data/sub/eurm_cfu_online.npz")

eurm_ens = sps.load_npz(ROOT_DIR + "/data/ENSEMBLED.npz")

#matrix = [rp3b, knn_c_i_ar, knn_c_i_al, nlp, cf_u]

#eurm_ens = ensembler(matrix, [0.720, 0.113, 0.177, 0.194, 1.0], normalization_type="max")

# HOLEBOOST
hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row)
eurm_ens = hb.boost_eurm(categories=[8, 10], k=300, gamma=5)

# NINEBOOST
nb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row)
eurm_ens = nb.boost_eurm(last_tracks=10, k=100, gamma=0.01)

rec_list = eurm_to_recommendation_list(eurm_ens)
rec_list_nlp = eurm_to_recommendation_list(nlp)

indices = dr.get_test_pids_indices(cat=1)
for i in indices:
    rec_list[i] = rec_list_nlp[i]

# EVALUATION
ev.evaluate(rec_list, name='ens_with_cfu_nineboosted', show_plot=False)
from utils.datareader import Datareader
import scipy.sparse as sps

old = sps.load_npz("online-test-old.npz")
new = sps.load_npz("online-test-new.npz")
dr_old = Datareader(verbose=False, mode='online', only_load=True, type="old")
dr_new = Datareader(verbose=False, mode='online', only_load=True)

#### controllo pids ####

for i in range(1, 11):
    print("indices new cat" + str(i) + ": " +
          str(dr_new.get_test_pids_indices(cat=i)))
    print("indices old cat" + str(i) + ": " +
          str(dr_old.get_test_pids_indices(cat=i)))

#### Controllo correttezza split ####

for i in range(1, 11):
    indices_old = dr_old.get_test_pids_indices(cat=i)
    indices_new = dr_new.get_test_pids_indices(cat=i)
    print(indices_old)
    res = old[indices_old] - new[indices_new]
    assert res.nnz == 0
    print("Split cat" + str(i) + ": ok")

#### Controllo operazioni di ensembling after split

res = []
for i in range(1, 11):
    indices_old = dr_old.get_test_pids_indices(cat=i)
Example #4
0
                  lanca=lanca,
                  lanca2=lanca2)

        pids = list(dr.get_train_pids()) + list(dr.get_test_pids())
        test_playlists = dr.get_test_pids()

        ucm = nlp.get_UCM(data1=data1)

        dr_old = Datareader(mode='online', only_load='True', type='old')

        train = ucm[:1000000]
        test = ucm[1000000:]

        test_indices = []
        for cat in range(1, 11):
            indices = dr_old.get_test_pids_indices(cat=cat)
            test_indices.extend(indices)

        new_indices = list(
            dr.get_train_pids()) + list(np.array(test_indices) + 1000000)
        ucm = ucm[new_indices]

        urm = dr.get_urm()
        urm = urm[pids]

        ucm = bm25_row(ucm)

        similarity = tversky_similarity(ucm,
                                        binary=False,
                                        shrink=1,
                                        alpha=0.1,