Ejemplo n.º 1
0
def icm():
    datareader = Datareader(mode='offline', only_load=True)
    evaluator = Evaluator(datareader)

    print('NLP...')
    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)
    test_playlists = datareader.get_test_pids()

    nlp = NLP(datareader=datareader, stopwords=[], mode='tracks')
    print('Getting ucm and icm...')
    icm = nlp.get_icm()
    icm = bm25_row(icm)

    print('Computing similarity...')
    start = time.time()
    # Compute similarity
    similarity = tversky_similarity(icm, shrink=200, alpha=0.1, beta=1)
    similarity = similarity.tocsr()
    print(time.time() - start)

    urm = datareader.get_urm()

    print('Computing eurm...')
    start = time.time()
    # Compute eurm
    eurm_nlp = dot_product(urm[test_playlists, :], similarity, k=500)
    eurm_nlp = eurm_nlp.tocsr()

    # sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp)
    evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp),
                       name='nlp_enriched')
Ejemplo n.º 2
0
def online():
    datareader = Datareader(mode='online', only_load=True)

    print('NLP...')
    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)

    nlp = NLP(datareader, stopwords=[])
    ucm = nlp.get_ucm()
    #ucm = bm25_row(ucm)
    #inplace_csr_column_scale(ucm, token_weights)

    urm = datareader.get_urm_shrinked()[0]

    print('Computing similarity...')
    start = time.time()
    # Compute similarity
    similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1)
    similarity = similarity.tocsr()
    print(time.time() - start)

    print('Computing eurm...')
    start = time.time()
    # Compute eurm
    eurm_nlp = dot_product(similarity, urm, k=500)
    eurm_nlp = eurm_nlp.tocsr()
    print(eurm_nlp.shape)
    eurm_nlp = eurm_nlp[-10000:, :]

    sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_no_stop_online.npz', eurm_nlp)
Ejemplo n.º 3
0
    def fitnessFunction(self, individual):

        # Convert list into a numpy array
        individual = np.array(individual)

        # Make a copy of the UCM and filter it for each column
        if self.verbose:
            print('Filtering UCM...')
        start = time.time()
        UCM_filtered = self.UCM.copy()
        UCM_filtered = UCM_filtered.astype(np.float64)
        inplace_csr_column_scale(UCM_filtered, individual)
        if self.verbose:
            print('UCM filtered in', time.time() - start, 'sec')

        # Compute similarity
        if self.verbose:
            print('Computing similarity...')
        start = time.time()
        similarity = tversky_similarity(UCM_filtered, shrink=200, alpha=0.1,
                                        beta=1, target_items=self.test_playlists_indices,
                                        binary=False)
        similarity = similarity.tocsr()
        if self.verbose:
            print('Similarity computed in', time.time() - start, 'sec')

        # Compute eurm
        if self.verbose:
            print('Computing eurm...')
        start = time.time()
        eurm = dot_product(similarity, self.URM_train, k=500)
        if self.verbose:
            print('eurm computed in', time.time() - start, 'sec')
            print('Converting eurm in csr...')
        start = time.time()
        eurm = eurm.tocsr()
        eurm = eurm[self.test_playlists_indices, :]
        if self.verbose:
            print('eurm converted in', time.time() - start, 'sec')

        # Evaluate
        rec_list = eurm_to_recommendation_list(eurm)
        print('current', self.current)

        score_cat_1 = self.evaluator.evaluate_single_metric(rec_list, name='Genetic', metric='prec',
                                                            level='track', cat=1, verbose=False)
        score_cat_2 = self.evaluator.evaluate_single_metric(rec_list, name='Genetic', metric='prec',
                                                            level='track', cat=2, verbose=False)
        score = (score_cat_1 + score_cat_2) / 2

        self.current += 1

        if self.verbose:
            print(score)

        print("Numfeatures {}".format(np.sum(individual)))
        print('\n')

        return score,
    def get_similarity_from_icm(self):
        self.get_icm()

        if self.verbose:
            print('Computing similarity from icm...')

        self.similarity_icm = tversky_similarity(self.icm,
                                                 shrink=200,
                                                 alpha=0.1,
                                                 beta=1)
        self.similarity_icm = self.similarity_icm.tocsr()

        return self.similarity_icm
Ejemplo n.º 5
0
def prova():

    dr = Datareader(mode='offline', only_load=True)
    print(dr.get_artist_to_tracks_dict())
    exit()

    dr = Datareader(mode='offline', only_load=True, verbose=False)
    test_playlists = dr.get_test_pids()

    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)

    nlp = NLP(mode='playlists', datareader=dr, stopwords=STOP_WORDS)
    s = nlp.get_ucm()
    print(s.shape)

    evaluator = Evaluator(dr)

    ucm = nlp.get_ucm()
    sim = sparse.load_npz(ROOT_DIR + '/data/cf_user_similarity.npz')

    print('Computing dot...')
    ucm = dot_product(sim, ucm, k=200)
    print('NNZ', ucm.nnz)
    exit()

    urm = dr.get_urm()

    # ucm = ucm.astype(np.float64)
    # inplace_csr_column_scale(ucm, token_weights)

    print('Computing similarity...')
    start = time.time()
    # Compute similarity
    similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1)
    similarity = similarity.tocsr()
    print(time.time() - start)

    print('Computing eurm...')
    start = time.time()
    # Compute eurm
    eurm_nlp = dot_product(similarity, urm, k=500)
    eurm_nlp = eurm_nlp.tocsr()
    eurm_nlp = eurm_nlp[test_playlists, :]

    #sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp)
    evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp),
                       name='nlp_enriched')
nlp = NLP2(dr, stopwords=[], norm=norm,work=work,split=split,date=date, skip_words=skip_words,
           porter=porter,porter2=porter2,lanca=lanca,lanca2=lanca2)
# new_titles, occ_full, occ_single = nlp.fit( verbose=False, workout=True, normalize=True, date=True, lancaster=False,
#                                                     porter=False, underscore=True, double_fit=False)


ucm = nlp.get_UCM(data1=data1)
urm = dr.get_urm()
test_playlists = dr.get_test_pids()
print('ucm', ucm.shape)
print('Computing similarity...')
start = time.time()
# Compute similarity
ucm= bm25_row(ucm)

similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1)
similarity = similarity.tocsr()
print(time.time() - start)


print('Computing eurm...')
start = time.time()
# Compute eurm
eurm = dot_product(similarity, urm, k=500)
eurm = eurm.tocsr()
eurm = eurm[test_playlists, :]
print('eurm', eurm.shape)
print(time.time() - start)


# Evaluating