Exemple #1
0
class Top_pop_p(object):
    def __init__(self):

        self.dr_on = Datareader(verbose=False, mode='online', only_load=True)
        self.dr_of = Datareader(verbose=False, mode='online', only_load=True)
        self.urm_on = self.dr_on.get_urm()
        self.urm_of = self.dr_on.get_urm()
        self.urm_col = sps.csc_matrix(self.urm_of)
        self.top_p = np.zeros(self.urm_of.shape[1])



    def album(self):
        eurm = sps.lil_matrix(self.urm_of.shape)
        pids = self.dr_on.get_test_pids(cat=2)
        pids_all = self.dr_of.get_test_pids()
        ucm_album = self.dr_of.get_ucm_albums().tocsc()
        album_dic = self.dr_of.get_track_to_album_dict()

        for row in tqdm(pids):
            track_ind = self.urm_on.indices[self.urm_on.indptr[row]:self.urm_on.indptr[row + 1]][0]

            album = album_dic[track_ind]
            playlists = ucm_album.indices[ucm_album.indptr[album]:ucm_album.indptr[album+1]]

            top = self.urm_of[playlists].sum(axis=0).A1.astype(np.int32)
            track_ind_rec = top.argsort()[-501:][::-1]

            eurm[row, track_ind_rec] = top[track_ind_rec]

        eurm = eurm.tocsr()[pids_all]
        eurm = eurm_remove_seed(eurm, self.dr_on)

        return eurm

    def track(self):
        eurm = sps.lil_matrix(self.urm_of.shape)
        pids = self.dr_on.get_test_pids(cat=2)
        pids_all = self.dr_of.get_test_pids()

        for row in tqdm(pids):
            track_ind = self.urm_on.indices[self.urm_on.indptr[row]:self.urm_on.indptr[row + 1]][0]

            playlists =  self.urm_col.indices[ self.urm_col.indptr[track_ind]: self.urm_col.indptr[track_ind+1]]

            top = self.urm_of[playlists].sum(axis=0).A1.astype(np.int32)
            track_ind_rec = top.argsort()[-501:][::-1]

            eurm[row, track_ind_rec] = top[track_ind_rec]

        eurm = eurm.tocsr()[pids_all]
        eurm = eurm_remove_seed(eurm, self.dr_on)
        print(eurm)
        return eurm.copy()
Exemple #2
0
urm = dr.get_urm()
urm_col = sps.csc_matrix(urm)
top_p = np.zeros(urm.shape[1])
rec = []
eurm1 = sps.lil_matrix(urm.shape)
eurm2 = sps.lil_matrix(urm.shape)
print(eurm1.shape)
pids = dr.get_test_pids(cat=2)
pids_all = dr.get_test_pids()

# TopPop Album
# ucm_album = dr.get_ucm_albums().tocsc()
# album_dic = dr.get_track_to_album_dict()

# TopPop Artist
ucm_album = dr.get_ucm_albums().tocsc()
artists_dic = dr.get_track_to_artist_dict()

album_to_tracks = load_obj(name="album_tracks_dict_offline",
                           path=ROOT_DIR + "/boosts/")
tracks_to_album = load_obj(name="artist_tracks_dict_offline",
                           path=ROOT_DIR + "/boosts/")

for row in tqdm(pids, desc="part1"):
    track_ind = urm.indices[urm.indptr[row]:urm.indptr[row + 1]][0]

    # TopPop Album
    album = artists_dic[track_ind]
    playlists = ucm_album.indices[ucm_album.indptr[album]:ucm_album.
                                  indptr[album + 1]]
            sps.save_npz('eurm_' + name + '_' + mode + '.npz', eurm)

        # Evaluation
        ev.evaluate(recommendation_list=eurm_to_recommendation_list(
            eurm, datareader=dr),
                    name=complete_name)

    elif mode == "online":
        # Initialization
        dr = Datareader(verbose=False, mode=mode, only_load=True)
        test_pids = list(dr.get_test_pids())
        sb = Submitter(dr)
        urm = dr.get_urm()

        # UCM
        ucm_artists = dr.get_ucm_albums()
        ucm_artists = bm25_row(ucm_artists)

        # Do not train on challenge set
        ucm_artists_T = ucm_artists.copy()
        inplace_set_rows_zero(ucm_artists_T, test_pids)
        ucm_artists_T = ucm_artists_T.T

        # Similarity
        print('Similarity..')
        sim = tversky_similarity(ucm_artists,
                                 ucm_artists_T,
                                 shrink=200,
                                 target_items=test_pids,
                                 alpha=0.1,
                                 beta=1,
Exemple #4
0
    name = "cbf_user_albums"
    knn = 800
    topk = 750
    save_eurm = True
    complete_name = mode + "_" + name + "_knn=" + str(knn) + "_topk=" + str(
        topk)

    if mode == "offline":
        # Initialization
        dr = Datareader(verbose=False, mode=mode, only_load=True)
        test_pids = list(dr.get_test_pids())
        ev = Evaluator(dr)
        urm = dr.get_urm()

        # UCM
        ucm_albums = dr.get_ucm_albums()
        ucm_albums = bm25_row(ucm_albums)

        # Similarity
        print('Similarity..')
        sim = tversky_similarity(ucm_albums,
                                 ucm_albums.T,
                                 shrink=200,
                                 target_items=test_pids,
                                 alpha=0.1,
                                 beta=1,
                                 k=knn,
                                 verbose=1,
                                 binary=False)
        sim = sim.tocsr()
Exemple #5
0
from utils.evaluator import Evaluator
from utils.datareader import Datareader
from utils.post_processing import *
from tqdm import tqdm
from scipy import sparse
import utils.sparse as ut
import pandas as pd
import numpy as np
import sys


datareader = Datareader(mode='offline', only_load=True, verbose=False)
evaluator = Evaluator(datareader)

urm = datareader.get_urm()
ucm_album = datareader.get_ucm_albums()

albums_pop = ucm_album.sum(axis=0).A1
mask = np.argsort(albums_pop)[::-1][:100]
ut.inplace_set_cols_zero(ucm_album, mask)

ucm_album = bm25_row(ucm_album)

print('Similarity..')
sim = tversky_similarity(ucm_album, ucm_album.T, shrink=200, alpha=0.1, beta=1, k=800, verbose=1, binary=False)
sim = sim.tocsr()

test_pids = list(datareader.get_test_pids())

eurm = dot_product(sim, urm, k=750)
eurm = eurm.tocsr()
class Top_pop_p(object):
    '''
    Class that allow the user to get the personalized top pop build following track or album
    '''
    def __init__(self):
        1

    def get_top_pop_album(self, mode):
        '''
        :return: csr_matrix filled with the reccomendation for the cat 2 following album
        '''

        if mode == "online":
            self.dr_on = Datareader(verbose=False,
                                    mode='online',
                                    only_load=True)
            self.urm_on = self.dr_on.get_urm()
            self.urm_col = sps.csc_matrix(self.urm_on)
            self.top_p = np.zeros(self.urm_on.shape[1])

            eurm = sps.lil_matrix(self.urm_on.shape)
            pids = self.dr_on.get_test_pids(cat=2)
            pids_all = self.dr_on.get_test_pids()
            ucm_album = self.dr_on.get_ucm_albums().tocsc()
            album_dic = self.dr_on.get_track_to_album_dict()

            for row in tqdm(pids):
                track_ind = self.urm_on.indices[self.urm_on.indptr[row]:self.
                                                urm_on.indptr[row + 1]][0]

                album = album_dic[track_ind]
                playlists = ucm_album.indices[ucm_album.indptr[album]:ucm_album
                                              .indptr[album + 1]]

                top = self.urm_on[playlists].sum(axis=0).A1.astype(np.int32)
                track_ind_rec = top.argsort()[-501:][::-1]

                eurm[row, track_ind_rec] = top[track_ind_rec]

            eurm = eurm.tocsr()[pids_all]
            eurm = eurm_remove_seed(eurm, self.dr_on)

        elif mode == "offline":
            self.dr_of = Datareader(verbose=False,
                                    mode='offline',
                                    only_load=True)
            self.urm_of = self.dr_of.get_urm()
            self.urm_col = sps.csc_matrix(self.urm_of)
            self.top_p = np.zeros(self.urm_of.shape[1])

            eurm = sps.lil_matrix(self.urm_of.shape)
            pids = self.dr_of.get_test_pids(cat=2)
            pids_all = self.dr_of.get_test_pids()
            ucm_album = self.dr_of.get_ucm_albums().tocsc()
            album_dic = self.dr_of.get_track_to_album_dict()

            for row in tqdm(pids):
                track_ind = self.urm_of.indices[self.urm_of.indptr[row]:self.
                                                urm_of.indptr[row + 1]][0]

                album = album_dic[track_ind]
                playlists = ucm_album.indices[ucm_album.indptr[album]:ucm_album
                                              .indptr[album + 1]]

                top = self.urm_of[playlists].sum(axis=0).A1.astype(np.int32)
                track_ind_rec = top.argsort()[-501:][::-1]

                eurm[row, track_ind_rec] = top[track_ind_rec]

            eurm = eurm.tocsr()[pids_all]
            eurm = eurm_remove_seed(eurm, self.dr_of)

        return eurm.copy().tocsr()

    def get_top_pop_track(self, mode):
        '''
        :return: csr_matrix filled with the reccomendation for the cat 2 following track
        '''
        if mode == "online":
            self.dr_on = Datareader(verbose=False,
                                    mode='online',
                                    only_load=True)
            self.urm_on = self.dr_on.get_urm()
            self.urm_col = sps.csc_matrix(self.urm_on)
            self.top_p = np.zeros(self.urm_on.shape[1])

            eurm = sps.lil_matrix(self.urm_on.shape)
            pids = self.dr_on.get_test_pids(cat=2)
            pids_all = self.dr_on.get_test_pids()

            for row in tqdm(pids):
                track_ind = self.urm_on.indices[self.urm_on.indptr[row]:self.
                                                urm_on.indptr[row + 1]][0]

                playlists = self.urm_col.indices[self.urm_col.
                                                 indptr[track_ind]:self.
                                                 urm_col.indptr[track_ind + 1]]

                top = self.urm_on[playlists].sum(axis=0).A1.astype(np.int32)
                track_ind_rec = top.argsort()[-501:][::-1]

                eurm[row, track_ind_rec] = top[track_ind_rec]

            eurm = eurm.tocsr()[pids_all]
            eurm = eurm_remove_seed(eurm, self.dr_on)

        elif mode == "offline":
            self.dr_of = Datareader(verbose=False,
                                    mode='offline',
                                    only_load=True)
            self.urm_of = self.dr_of.get_urm()
            self.urm_col = sps.csc_matrix(self.urm_of)
            self.top_p = np.zeros(self.urm_of.shape[1])

            eurm = sps.lil_matrix(self.urm_of.shape)
            pids = self.dr_of.get_test_pids(cat=2)
            pids_all = self.dr_of.get_test_pids()

            for row in tqdm(pids):
                track_ind = self.urm_of.indices[self.urm_of.indptr[row]:self.
                                                urm_of.indptr[row + 1]][0]

                playlists = self.urm_col.indices[self.urm_col.
                                                 indptr[track_ind]:self.
                                                 urm_col.indptr[track_ind + 1]]

                top = self.urm_of[playlists].sum(axis=0).A1.astype(np.int32)
                track_ind_rec = top.argsort()[-501:][::-1]

                eurm[row, track_ind_rec] = top[track_ind_rec]

            eurm = eurm.tocsr()[pids_all]
            eurm = eurm_remove_seed(eurm, self.dr_of)

        return eurm.copy().tocsr()