예제 #1
0
파일: CFW.py 프로젝트: keyblade95/RecSys
    def validate(self,
                 user_ids=d.get_target_playlists(),
                 log_path=None,
                 normalize_similarity=[False],
                 damp_coeff=[1],
                 add_zeros_quota=[1],
                 loss_tolerance=[1e-6],
                 iteration_limit=[30],
                 use_incremental=[False]):
        if log_path != None:
            orig_stdout = sys.stdout
            f = open(
                log_path + '/' + self.name + ' ' + time.strftime('_%H-%M-%S') +
                ' ' + time.strftime('%d-%m-%Y') + '.txt', 'w')
            sys.stdout = f

        for ns in normalize_similarity:
            for dc in damp_coeff:
                for adq in add_zeros_quota:
                    for lt in loss_tolerance:
                        for il in iteration_limit:
                            for ui in use_incremental:
                                print(
                                    self._print(normalize_similarity=ns,
                                                add_zeros_quota=dc,
                                                loss_tolerance=lt,
                                                iteration_limit=il,
                                                damp_coeff=dc,
                                                use_incremental=ui))
                                self.fit(ICM=d.get_icm(),
                                         URM_train=d.get_urm_train_1(),
                                         normalize_similarity=ns,
                                         add_zeros_quota=adq,
                                         loss_tolerance=lt,
                                         iteration_limit=il,
                                         damp_coeff=dc,
                                         use_incremental=ui)

                                recs = self.recommend_batch(
                                    user_ids, urm=d.get_urm_train_1())
                                r.evaluate(recs, d.get_urm_test_1())
        if log_path != None:
            sys.stdout = orig_stdout
            f.close()
예제 #2
0
 def __init__(self, h):
     """
     h: (int), length of the sequences
     split_perc: (float) validation split percentage, 0 to skip the creation of the validation set
     """
     super(SequentialRecommender, self).__init__()
     self.name = 'sequential'
     self.h = h
     
     # build sequences dataset and cache it
     self.sequences, self.target_indices = ps.get_sequences(h=h)
     target_ids = data.get_target_playlists()[0:data.N_SEQUENTIAL]
     self.target_ids = np.array(target_ids)
     self.already_liked_indices = (data.get_urm_train_1()[target_ids]).nonzero()
     self.H = seqsim.getH(self.sequences)
예제 #3
0
    def run(self, urm_train=None, urm=None, urm_test=None, targetids=None,
            factors=100, regularization=0.01, iterations=100, alpha=25, with_scores=False, export=True, verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        num_factors : int, number of latent factors
        urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the
            entire URM for which the targetids corresponds to the row indexes.
        urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test()
        targetids : list, target user ids. If None, use: data.get_target_playlists()

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        _urm = data.get_urm_train_1()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test_1()
        _targetids = data.get_target_playlists()
        #_targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm if urm_train is None else urm_train
        #urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(urm=urm_train, factors=factors, regularization=regularization, iterations=iterations, alpha=alpha)
        recs = self.recommend_batch(userids=targetids, with_scores=with_scores, verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name=self.name, verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time()-start))
        
        return recs, map10
예제 #4
0
    def run(self, distance, ucm_train=None, urm=None, urm_test=None, targetids=None, k=100, shrink=10, threshold=0,
            implicit=True, alpha=None, beta=None, l=None, c=None, with_scores=False, export=True, verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        distance : str, distance metric
        targetids : list, target user ids. If None, use: data.get_target_playlists()
        k : int, K nearest neighbour to consider
        shrink : float, shrink term used in the normalization
        threshold : float, all the values under this value are cutted from the final result
        implicit : bool, if true, treat the URM as implicit, otherwise consider explicit ratings (real values) in the URM

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        start = time.time()
        
        _ucm_train = data.get_ucm_train()
        _urm = data.get_urm_train_1()
        _urm_test = data.get_urm_test_1()
        _targetids = data.get_target_playlists()

        ucm_train = _ucm_train if ucm_train is None else ucm_train
        urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(ucm_train, k=k, distance=distance, alpha=alpha, beta=beta, c=c, l=l, shrink=shrink, threshold=threshold, implicit=implicit)
        recs = self.recommend_batch(targetids, urm=urm, with_scores=with_scores, verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name='{}_{}'.format(self.name,distance), verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time()-start))
        
        return recs, map10
예제 #5
0
파일: CFW.py 프로젝트: keyblade95/RecSys
    def run(self,
            normalize_similarity=False,
            add_zeros_quota=1,
            loss_tolerance=1e-6,
            iteration_limit=30,
            damp_coeff=1,
            use_incremental=False,
            export_results=True,
            export_r_hat=False,
            export_for_validation=False):
        if export_r_hat and export_for_validation:
            urm = d.get_urm_train_1()
        else:
            urm = d.get_urm()

        self.fit(ICM=d.get_icm(),
                 URM_train=urm,
                 normalize_similarity=normalize_similarity,
                 add_zeros_quota=add_zeros_quota,
                 loss_tolerance=loss_tolerance,
                 iteration_limit=iteration_limit,
                 damp_coeff=damp_coeff,
                 use_incremental=use_incremental)
        if export_results:
            print('exporting results')
            recs = self.recommend_batch(d.get_target_playlists(),
                                        N=10,
                                        urm=urm,
                                        filter_already_liked=True,
                                        with_scores=False,
                                        items_to_exclude=[],
                                        verbose=False)
            importexport.exportcsv(
                recs, 'submission',
                self._print(normalize_similarity=normalize_similarity,
                            add_zeros_quota=add_zeros_quota,
                            loss_tolerance=loss_tolerance,
                            iteration_limit=iteration_limit,
                            damp_coeff=damp_coeff,
                            use_incremental=use_incremental))
        elif export_r_hat:
            print('saving estimated urm')
            self.save_r_hat(export_for_validation)
예제 #6
0
파일: CFW.py 프로젝트: keyblade95/RecSys
    def fit(self,
            ICM=data.get_icm(),
            URM_train=data.get_urm_train_1(),
            normalize_similarity=True,
            add_zeros_quota=0.1,
            loss_tolerance=0.0001,
            iteration_limit=100,
            damp_coeff=0.1,
            use_incremental=True):
        self.URM_train = URM_train
        self.ICM = ICM
        self.n_items = self.URM_train.shape[1]
        self.n_users = self.URM_train.shape[0]
        self.n_features = self.ICM.shape[1]
        self.normalize_similarity = normalize_similarity
        self.add_zeros_quota = add_zeros_quota
        self.use_incremental = use_incremental

        self._generateTrainData_low_ram()

        common_features = self.ICM[self.row_list].multiply(
            self.ICM[self.col_list])

        linalg_result = linalg.lsqr(common_features,
                                    self.data_list,
                                    show=False,
                                    atol=loss_tolerance,
                                    btol=loss_tolerance,
                                    iter_lim=iteration_limit,
                                    damp=damp_coeff)

        self.D_incremental = linalg_result[0].copy()
        self.D_best = linalg_result[0].copy()
        self.epochs_best = 0
        self.loss = linalg_result[3]

        self._compute_W_sparse()
예제 #7
0
                            userids=userids,
                            N=N,
                            filter_already_liked=filter_already_liked,
                            items_to_exclude=items_to_exclude)
                        #evaluate the model with map10
                        map10 = recommender.evaluate(recommendations,
                                                     test_urm=urm_test)
                        if verbose:
                            print('map@10: {}'.format(map10))

                        #write on external files on folder models_validation
                        if write_on_file:
                            out.write(
                                '\n\nl1_ratio: {}\n alpha: {}\n Iterations: {}\n '
                                'topK: {}\n evaluation map@10: {}'.format(
                                    l, a, m, k, map10))


"""
If this file is executed, test the SPLUS distance metric
"""
if __name__ == '__main__':
    rec = SLIMElasticNetRecommender()
    rec.fit(urm=data.get_urm_train_1(),
            max_iter=1,
            topK=400,
            alpha=1e-4,
            l1_ratio=0.5)
    recs = rec.recommend_batch(userids=data.get_target_playlists())
    rec.evaluate(recommendations=recs, test_urm=data.get_urm_test_1())
예제 #8
0
If this file is executed, test the P3alpha recommender
"""
if __name__ == '__main__':
    print()
    log.success('++ What do you want to do? ++')
    log.warning('(t) Test the model with some default params')
    log.warning('(r) Save the R^')
    log.warning('(s) Save the similarity matrix')
    log.warning('(v) Validate the model')
    log.warning('(e) Export the submission')
    log.warning('(x) Exit')
    arg = input()[0]
    print()

    if arg == 't':
        model = P3alphaRecommender(data.get_urm_train_1())
        model.fit(topK=900,
                  alpha=1.2,
                  min_rating=0,
                  implicit=True,
                  normalize_similarity=False)
        recs = model.recommend_batch(data.get_target_playlists())
        evaluate(recs, test_urm=data.get_urm_test_1())
    elif arg == 'r':
        log.info('Wanna save for evaluation (y/n)?')
        if input()[0] == 'y':
            model = P3alphaRecommender(data.get_urm())
            path = 'raw_data/saved_r_hat_evaluation/'
        else:
            model = P3alphaRecommender(data.get_urm_train_1())
            path = 'raw_data/saved_r_hat/'
예제 #9
0
import numpy as np
from scipy.sparse import load_npz
import implicit
import pandas as pd
import data.data as data
import scipy.sparse as sps

# load data
targetUsersIds = data.get_target_playlists()

# get item_user matrix by transposing the URM matrix
URM = data.get_urm_train_1()
item_user_data = URM.transpose()
print('> data loaded')

# initialize a model (BM25 metric)
model = implicit.nearest_neighbours.BM25Recommender(K=400, K1=1.5, B=0.3)

# train the model on a sparse matrix of item/user/confidence weights
model.fit(item_user_data)

r_hat = np.dot(URM[targetUsersIds], model.similarity)
sps.save_npz('raw_data/saved_r_hat_evaluation/BM25', r_hat)
"""
# build recommendations array
recommendations = bridge.array_of_recommendations(model, target_user_ids=targetUsersIds, urm=URM)

Export.export(np.array(recommendations), path='../submissions/', name='BM25 K {} K1 {} B{}'.format(K, K1, B))

print('file exported')
예제 #10
0
            log.error('Invalid value of k {}'.format(at_k))
            return

        aps = 0.0
        for r in recommendations:
            row = test_urm.getrow(r[0]).indices
            m = min(at_k, len(row))

            ap = 0.0
            n_elems_found = 0.0
            for j in range(1, m + 1):
                if r[j] in row:
                    n_elems_found += 1
                    ap = ap + n_elems_found / j
            if m > 0:
                ap = ap / m
                aps = aps + ap

        result = aps / len(recommendations)
        if verbose:
            log.warning('MAP: {}'.format(result))
        return result


rec = IALS_numpy()
rec.fit(R=data.get_urm_train_1())
r_hat = sps.csr_matrix(np.dot(rec.X[data.get_target_playlists()], rec.Y.T))
sps.save_npz('raw_data/saved_r_hat_evaluation/IALS', r_hat)
#recs = rec.recommend_batch(userids=data.get_target_playlists())
#rec.evaluate(recs, data.get_urm_test())
예제 #11
0
파일: MF_BPR.py 프로젝트: keyblade95/RecSys
        if filter_already_liked:
            scores = self._filter_seen_on_scores(userid, scores)

        if len(items_to_exclude) > 0:
            raise NotImplementedError(
                'Items to exclude functionality is not implemented yet')

        relevant_items_partition = (-scores).argpartition(n)[0:n]
        relevant_items_partition_sorting = np.argsort(
            -scores[relevant_items_partition])
        ranking = relevant_items_partition[relevant_items_partition_sorting]

        if with_scores:
            best_scores = scores[ranking]
            return [userid] + [list(zip(list(ranking), list(best_scores)))]
        else:
            return [userid] + list(ranking)


import data.data as d

r = MFBPR()
r.fit(d.get_urm_train_1(),
      epochs=300,
      n_factors=300,
      learning_rate=1e-1,
      user_regularization=1e-3,
      positive_item_regularization=1e-3,
      negative_item_regularization=1e-3,
      evaluate_every=1)
예제 #12
0

def create_ucm_from_urm(urm_train):
    """
    Create ucm

    @Params
    proc_int        (ProcessInteractions) personalizes the preprocess of the train.csv dataframe
    split           (Split) personalizes the split into train and test of data coming after ProcessInteractions
    save_dataframes (Bool) whether to save the train and test dataframes or not
    """
    path = "raw_data/ucm" + str(randint(1, 100))
    print('starting dataset creation of UCM in ' + path)

    # maybe can be better a dense array?
    ICM = csr_matrix(create_icm(d.get_tracks_df(), []))
    UCM = lil_matrix((d.N_PLAYLISTS,ICM.shape[1]), dtype=np.int)
    for p in range(d.N_PLAYLISTS):
        track_indices = urm_train[p].nonzero()[1]
        for track_id in track_indices:
            UCM[p] += ICM.getrow(track_id)
        log.progressbar(p, d.N_PLAYLISTS)

    # save matrices
    os.mkdir(path)
    save_npz(path + '/ucm', UCM)

if __name__ == "__main__":    
    urm = d.get_urm_train_1()
    create_ucm_from_urm(urm_train=urm)
예제 #13
0
파일: WF_MF.py 프로젝트: keyblade95/RecSys
        for r in recommendations:
            row = test_urm.getrow(r[0]).indices
            m = min(at_k, len(row))

            ap = 0.0
            n_elems_found = 0.0
            for j in range(1, m + 1):
                if r[j] in row:
                    n_elems_found += 1
                    ap = ap + n_elems_found / j
            if m > 0:
                ap = ap / m
                aps = aps + ap

        result = aps / len(recommendations)
        if verbose:
            log.warning('MAP: {}'.format(result))
        return result


if __name__ == '__main__':
    rec = ProductRecommender()
    rec.fit(user_x_product=data.get_urm_train_1(),
            latent_features_guess=10,
            learning_rate=0.01,
            steps=2,
            regularization_penalty=0.2,
            convergeance_threshold=0.01)
    recs = rec.recommend_batch(data.get_target_playlists())
    rec.evaluate(recommendations=recs, test_urm=data.get_urm_test_1())
예제 #14
0
                  distance=model.SIM_SPLUS,
                  k=600,
                  alpha=0.25,
                  beta=0.5,
                  shrink=10,
                  l=0.25,
                  c=0.5)
        print('Saving the similarity matrix...')
        sps.save_npz(
            'raw_data/saved_sim_matrix_evaluation_2/{}'.format(model.name),
            model.get_sim_matrix())
    elif arg == 'v':
        # model.validate(iterations=10, urm_train=data.get_urm_train_1(), urm_test=data.get_urm_test_1(), targetids=data.get_target_playlists(),
        #          distance=model.SIM_SPLUS, k=(100, 600), alpha=(0,2), beta=(0,2),shrink=(0,100),l=(0,1),c=(0,1))
        model.validate(iterations=10,
                       urm_train=data.get_urm_train_1(),
                       urm_test=data.get_urm_test_1(),
                       targetids=data.get_target_playlists(),
                       distance=model.SIM_RP3BETA,
                       k=(100, 600),
                       alpha=(0, 2),
                       beta=(0, 2),
                       shrink=(0, 100),
                       l=1,
                       c=1)
        #model.test(distance=CFItemBased.SIM_P3ALPHA, k=300,alpha=(0,2),shrink=(0,100))
    elif arg == 'x':
        pass
    else:
        log.error('Wrong option!')
예제 #15
0
    def run(self,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            factors=100,
            regularization=0.01,
            iterations=100,
            alpha=25,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Returns
        -------
        :return: recs: (list) recommendations
        :return: map10: (float) MAP10 for the provided recommendations
        """
        _urm_train = data.get_urm_train_1()
        _urm = data.get_urm()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test_1()
        _targetids = data.get_target_playlists()
        # _targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm_train if urm_train is None else urm_train
        urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(l1_ratio=0.1,
                 positive_only=True,
                 alpha=1e-4,
                 fit_intercept=False,
                 copy_X=False,
                 precompute=False,
                 selection='random',
                 max_iter=100,
                 topK=100,
                 tol=1e-4,
                 workers=multiprocessing.cpu_count())
        recs = self.recommend_batch(userids=targetids,
                                    with_scores=with_scores,
                                    verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name=self.name, verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10
예제 #16
0
def wizard_hybrid():
    SIM_MATRIX = ['saved_sim_matrix', 'saved_sim_matrix_evaluation']
    R_HAT = ['saved_r_hat', 'saved_r_hat_evaluation']
    SAVE = ['saved_sim_matrix', 'saved_r_hat']
    EVALUATE = ['saved_sim_matrix_evaluation', 'saved_r_hat_evaluation']

    start = time.time()

    matrices_array, folder, models = hb.create_matrices_array()

    print('matrices loaded in {:.2f} s'.format(time.time() - start))
    log.success('You have loaded: {}'.format(models))

    NORMALIZATION_MODE = normalization_mode_selection()

    if folder in SAVE:
        WEIGHTS = weights_selection(models)

        if folder in SIM_MATRIX:
            name, urm_filter_tracks, rel_path = option_selection_save('SIM')
            hybrid_rec = HybridSimilarity(
                matrices_array,
                normalization_mode=NORMALIZATION_MODE,
                urm_filter_tracks=urm_filter_tracks)
            sps.save_npz('raw_data/' + rel_path + name,
                         hybrid_rec.get_r_hat(weights_array=WEIGHTS))
        if folder in R_HAT:
            name, urm_filter_tracks, rel_path, EXPORT = option_selection_save(
                'R_HAT')
            hybrid_rec = HybridRHat(matrices_array,
                                    normalization_mode=NORMALIZATION_MODE,
                                    urm_filter_tracks=urm_filter_tracks)
            if EXPORT:
                N = ask_number_recommendations()
                recommendations = hybrid_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists(),
                    N=N)
                exportcsv(recommendations, path='submission', name=name)
            else:
                sps.save_npz('raw_data/' + rel_path + name,
                             hybrid_rec.get_r_hat(weights_array=WEIGHTS))

    elif folder in EVALUATE:
        log.success('|WHAT YOU WANT TO DO ???|')
        log.warning('\'1\' BAYESIAN SEARCH VALIDATION')
        log.warning('\'2\' HAND CRAFTED WEIGHTS')
        mode = input()[0]

        # BAYESIAN SEARCH
        if mode == '1':
            log.success(
                '|SELECT A NUMBER OF |||ITERATIONS||| FOR THE ALGORITHM|')
            iterations = float(input())
            urm_filter_tracks = data.get_urm_train_1()
            if folder in SIM_MATRIX:
                hybrid_rec = HybridSimilarity(
                    matrices_array,
                    normalization_mode=NORMALIZATION_MODE,
                    urm_filter_tracks=urm_filter_tracks)
            if folder in R_HAT:
                hybrid_rec = HybridRHat(matrices_array,
                                        normalization_mode=NORMALIZATION_MODE,
                                        urm_filter_tracks=urm_filter_tracks)
            hybrid_rec.validate(iterations=iterations,
                                urm_test=data.get_urm_test_1(),
                                userids=data.get_target_playlists())

        # MANUAL WEIGHTS
        elif mode == '2':
            WEIGHTS = weights_selection(models)
            urm_filter_tracks = data.get_urm_train_1()
            chose = option_selection_evaluation_2()  # save, evaluate or csv
            if chose == 's':
                log.success('|CHOSE A NAME FOR THE MATRIX...|')
                name = input()
                if folder in SIM_MATRIX:
                    type = 'SIM'
                    hybrid_rec = HybridSimilarity(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                elif folder in R_HAT:
                    type = 'R_HAT'
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)

                sps.save_npz('raw_data/saved_r_hat_evaluation/' + name,
                             hybrid_rec.get_r_hat(weights_array=WEIGHTS))
                sym_rec = symmetric_recommender_creator(
                    models,
                    type,
                    NORMALIZATION_MODE,
                    urm_filter_tracks=data.get_urm_train_2())
                sps.save_npz('raw_data/saved_r_hat_evaluation_2/' + name,
                             sym_rec.get_r_hat(weights_array=WEIGHTS))

            elif chose == 'e':
                if folder in SIM_MATRIX:
                    type = 'SIM'
                    hybrid_rec = HybridSimilarity(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                elif folder in R_HAT:
                    type = 'R_HAT'
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                N = ask_number_recommendations()
                print('Recommending...')
                recs = hybrid_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists(),
                    N=N)
                hybrid_rec.evaluate(recommendations=recs,
                                    test_urm=data.get_urm_test_1())

                # export the recommendations
                log.success(
                    'Do you want to save the CSV with these recomendations? (y/n)'
                )
                if input()[0] == 'y':
                    export_csv_wizard(recs)

                sym_rec = symmetric_recommender_creator(
                    models,
                    type,
                    NORMALIZATION_MODE,
                    urm_filter_tracks=data.get_urm_train_2())
                recs2 = sym_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists())
                sym_rec.evaluate(recommendations=recs2,
                                 test_urm=data.get_urm_test_2())

            elif chose == 'c':
                if folder in R_HAT:
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                    N = ask_number_recommendations()
                    print('Recommending...')
                    recs = hybrid_rec.recommend_batch(
                        weights_array=WEIGHTS,
                        target_userids=data.get_target_playlists(),
                        N=N)

                    export_csv_wizard(recs)
                else:
                    log.error('not implemented yet')
    else:
        log.error('WRONG FOLDER')
예제 #17
0
def validate(l1_ratio_array,
             alpha_array,
             max_iter_array,
             topK_array,
             userids=data.get_target_playlists(),
             urm_train=data.get_urm_train_1(),
             urm_test=data.get_urm_test_1(),
             filter_already_liked=True,
             items_to_exclude=[],
             N=10,
             verbose=True,
             write_on_file=True):
    """
    -----------
    :return: _
    """

    #create the initial model
    recommender = SLIMElasticNetRecommender()

    path = 'validation_results/'
    name = 'slim_rmse'
    folder = time.strftime('%d-%m-%Y')
    filename = '{}/{}/{}{}.csv'.format(path, folder, name,
                                       time.strftime('_%H-%M-%S'))
    # create dir if not exists
    os.makedirs(os.path.dirname(filename), exist_ok=True)

    with open(filename, 'w') as out:
        for l in l1_ratio_array:
            for a in alpha_array:
                for m in max_iter_array:
                    for k in topK_array:

                        #train the model with the parameters
                        if verbose:
                            print(
                                '\n\nTraining slim_rmse with\n l1_ratio: {}\n alpha: {}\n'
                                'Iterations: {}\n topK: {}'.format(l, a, m, k))
                            print('\n training phase...')
                        recommender.fit(urm=urm_train,
                                        l1_ratio=l,
                                        alpha=a,
                                        max_iter=m,
                                        topK=k)

                        #get the recommendations from the trained model
                        recommendations = recommender.recommend_batch(
                            userids=userids,
                            N=N,
                            filter_already_liked=filter_already_liked,
                            items_to_exclude=items_to_exclude)
                        #evaluate the model with map10
                        map10 = recommender.evaluate(recommendations,
                                                     test_urm=urm_test)
                        if verbose:
                            print('map@10: {}'.format(map10))

                        #write on external files on folder models_validation
                        if write_on_file:
                            out.write(
                                '\n\nl1_ratio: {}\n alpha: {}\n Iterations: {}\n '
                                'topK: {}\n evaluation map@10: {}'.format(
                                    l, a, m, k, map10))
예제 #18
0
# #### Explode each row into multiple rows (one per interaction)

#%%
recs_tracks = []
for rec in raw_recs:
    playlist_id = rec[0]
    for t in rec[1:]:
        recs_tracks.append([playlist_id, t])
recs_df = pd.DataFrame(recs_tracks, columns=['playlist_id','track_id'])

#%% [markdown]
# #### Append the 'profile_length' column to the recommendation dataframe

#%%
target_ids = data.get_target_playlists()
targetURM = data.get_urm_train_1()[target_ids]
user_profile_lengths = np.array(targetURM.sum(axis=1)).flatten()
profile_lengths_df = pd.DataFrame({'playlist_id': target_ids, 'profile_length': user_profile_lengths})

#%%
rec_lengths_df = recs_df.merge(profile_lengths_df, on='playlist_id')

#%% [markdown]
# #### Popularity feature

#%%
df = data.get_playlists_df()
popularity = df.groupby(['track_id']).size().reset_index(name='popularity')

#%%
rec_pop_df = rec_lengths_df.join(popularity.set_index('track_id'), on='track_id')