Ejemplo n.º 1
0
    #                         if verbose:
    #                             print('map@10: {}'.format(map10))
    #
    #                         #write on external files on folder models_validation
    #                         if write_on_file:
    #                             out.write('\n\nFactors: {}\n Regulatization: {}\n Iterations: {}\n '
    #                                       'Alpha_val: {}\n evaluation map@10: {}'.format(f, r, i, a, map10))
    #

"""
If this file is executed, test the als
"""

if __name__ == '__main__':
    model = AlternatingLeastSquare()
    model.fit(urm=data.get_urm_train_2(), factors=500, regularization=0.5, iterations=200, alpha=25)
    sps.save_npz('raw_data/saved_r_hat_evaluation_2/als', model.get_r_hat())
"""
    print()
    log.success('++ What do you want to do? ++')
    log.warning('(t) Test the model with some default params')
    log.warning('(r) Save the R^')
    #log.warning('(v) Validate the model')
    log.warning('(x) Exit')
    arg = input()[0]
    print()
    
    model = AlternatingLeastSquare()
    if arg == 't':
        model.fit(urm=data.get_urm_train_2(), factors=1500, regularization=0.05, iterations=10, alpha=25)
        recs = model.recommend_batch(userids=data.get_target_playlists())
Ejemplo n.º 2
0
    model = CFUserBased()
    if arg == 't':
        # recs = model.recommend_batch(userids=data.get_target_playlists(), urm=data.get_urm_train())
        # model.evaluate(recommendations=recs, test_urm=data.get_urm_test())
        model.test(distance=CFUserBased.SIM_SPLUS,
                   k=600,
                   alpha=0.25,
                   beta=0.5,
                   shrink=10,
                   l=0.25,
                   c=0.5)
    elif arg == 'r':
        log.info('Wanna save for evaluation (y/n)?')
        choice = input()[0] == 'y'
        model.fit(data.get_urm_train_2(),
                  distance=model.SIM_SPLUS,
                  k=400,
                  alpha=0.25,
                  beta=0.5,
                  shrink=0,
                  l=0.25,
                  c=0.25)
        print('Saving the R^...')
        model.save_r_hat(evaluation=choice)
    elif arg == 's':
        model.fit(data.get_urm_train_2(),
                  distance=model.SIM_SPLUS,
                  k=400,
                  alpha=0.25,
                  beta=0.5,
Ejemplo n.º 3
0
    log.warning('(r) Save the R^')
    log.warning('(s) Save the similarity matrix')
    #log.warning('(v) Validate the model')
    log.warning('(x) Exit')
    arg = input()[0]
    print()
    
    model = ContentBasedRecommender()
    if arg == 't':
        # recs = model.recommend_batch(userids=data.get_target_playlists(), urm=data.get_urm_train())
        # model.evaluate(recommendations=recs, test_urm=data.get_urm_test())
        model.test(distance=model.SIM_SPLUS, k=500,alpha=0.75,beta=1,shrink=500,l=0.5,c=0.5)
    elif arg == 'r':
        log.info('Wanna save for evaluation (y/n)?')
        choice = input()[0] == 'y'
        model.fit(urm=data.get_urm_train_2(),icm=data.get_icm(), distance=model.SIM_SPLUS,k=500,shrink=500,alpha=0.75,beta=1,l=0.5,c=0.5)
        print('Saving the R^...')
        model.save_r_hat(evaluation=choice)
    elif arg == 's':
        model.fit(urm=data.get_urm_train_2(),icm=data.get_icm(), distance=model.SIM_SPLUS,k=500,shrink=500,alpha=0.75,beta=1,l=0.5,c=0.5)
        print('Saving the similarity matrix...')
        sps.save_npz('raw_data/saved_sim_matrix_evaluation_2/{}'.format(model.name), model.get_sim_matrix())
    # elif arg == 'v':
    #     model.validate(....)
    elif arg == 'x':
        pass
    else:
        log.error('Wrong option!')

    # recs = model.recommend_batch(userids=data.get_target_playlists(), urm=data.get_urm_train())
    # recs_seq = model.recommend_batch(userids=data.get_sequential_target_playlists(), urm=data.get_urm_train())
Ejemplo n.º 4
0
if __name__ == '__main__':
    print()
    log.success('++ What do you want to do? ++')
    log.warning('(t) Test the model with some default params')
    log.warning('(r) Save the R^')
    log.warning('(s) Save the similarity matrix')
    #log.warning('(v) Validate the model')
    log.warning('(x) Exit')
    arg = input()[0]
    print()
    
    if arg == 't':
        # recs = model.recommend_batch(userids=data.get_target_playlists(), urm=data.get_urm_train())
        # model.evaluate(recommendations=recs, test_urm=data.get_urm_test())
        model = UserKNNCFRecommender(URM_train=data.get_urm_train_2())
        recs = model.recommend_batch(userids=data.get_target_playlists(), type='USER')
        model.evaluate(recs, test_urm=data.get_urm_test_2())
    elif arg == 'r':
        log.info('Wanna save for evaluation (y/n)?')
        choice = input()[0] == 'y'
        model = UserKNNCFRecommender(URM_train=data.get_urm_train_2())
        model.fit()
        print('Saving the R^...')
        model.save_r_hat(evaluation=choice)
    elif arg == 's':
        model = UserKNNCFRecommender(URM_train=data.get_urm_train_2())
        model.fit()
        print('Saving the similarity matrix...')
        sps.save_npz('raw_data/saved_sim_matrix_evaluation/{}'.format(model.name), model.W_sparse)
    # elif arg == 'v':
Ejemplo n.º 5
0
                        out.write('\n\nFactors: {}\n Iteration: {}\n evaluation map@10: {}'.format(f, i, map10))



"""
If this file is executed, test the SPLUS distance metric
"""
if __name__ == '__main__':
    print()
    log.success('++ What do you want to do? ++')
    log.warning('(t) Test the model with some default params')
    log.warning('(r) Save the R^')
    log.warning('(v) Validate the model')
    log.warning('(x) Exit')
    arg = input()[0]
    print()
    
    model = Pure_SVD()
    if arg == 'r':
        log.info('Wanna save for evaluation (y/n)?')
        choice = input()[0] == 'y'
        model.fit(urm_train=data.get_urm_train_2(), num_factors=700, iteration=1)
        print('Saving the R^...')
        model.save_r_hat(evaluation=choice)
    elif arg == 'v':
        model.validate(factors_array=[600, 640, 700, 730, 800, 860, 1000], iteration_array=[1, 2, 5])
    elif arg == 'x':
        pass
    else:
        log.error('Wrong option!')
Ejemplo n.º 6
0
    def fit(self,
            URM_train=d.get_urm_train_2(),
            epochs=70,
            URM_test=d.get_urm_test_2(),
            user_ids=d.get_target_playlists(),
            batch_size=1000,
            validate_every_N_epochs=2,
            start_validation_after_N_epochs=71,
            lambda_i=0.0,
            lambda_j=0.0,
            learning_rate=0.01,
            topK=1500,
            sgd_mode='adagrad'):
        """
        train the model finding matrix W
        :param epochs(int)
        :param batch_size(int) after how many items the params should be updated
        :param lambda_i(float) first regularization term
        :param lambda_j(float) second regularization term
        :param learning_rate(float) algorithm learning rate
        :param topK(int) how many elements should be taken into account while computing URM*W
        :param sgd_mode(string) optimization algorithm
        :param URM_train(csr_matrix) the URM used to train the model. Either the full or the validation one
        :param URM_test(csr_matrix) needed if we'd like to perform validation
        :param user_ids(list) needed if we'd like to perform validation
        :param validate_every_N_epochs(int) how often the MAP evaluation should be displayed
        :param start_validation_after_N_epochs(int)
        """

        self.URM_train = URM_train
        self.n_users = URM_train.shape[0]
        self.n_items = URM_train.shape[1]

        self.sgd_mode = sgd_mode

        from cythoncompiled.SLIM_BPR.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch

        self.cythonEpoch = SLIM_BPR_Cython_Epoch(self.URM_train,
                                                 sparse_weights=False,
                                                 topK=topK,
                                                 learning_rate=learning_rate,
                                                 li_reg=lambda_i,
                                                 lj_reg=lambda_j,
                                                 batch_size=1,
                                                 symmetric=True,
                                                 sgd_mode=sgd_mode)

        # Cal super.fit to start training
        self._fit_alreadyInitialized(
            epochs=epochs,
            logFile=None,
            URM_test=URM_test,
            user_ids=user_ids,
            filterTopPop=False,
            minRatingsPerUser=1,
            batch_size=batch_size,
            validate_every_N_epochs=validate_every_N_epochs,
            start_validation_after_N_epochs=start_validation_after_N_epochs,
            lambda_i=lambda_i,
            lambda_j=lambda_j,
            learning_rate=learning_rate,
            topK=topK)
Ejemplo n.º 7
0
if __name__ == '__main__':
    print()
    log.success('++ What do you want to do? ++')
    log.warning('(t) Test the model with some default params')
    log.warning('(r) Save the R^')
    log.warning('(s) Save the similarity matrix')
    #log.warning('(v) Validate the model')
    log.warning('(x) Exit')
    arg = input()[0]
    print()

    model = SLIM_BPR()
    if arg == 'r':
        log.info('Wanna save for evaluation (y/n)?')
        choice = input()[0] == 'y'
        model.fit(URM_train=data.get_urm_train_2(),
                  URM_test=data.get_urm_test_2())
        print('Saving the R^...')
        model.save_r_hat(evaluation=choice)
    elif arg == 's':
        model.fit()
        print('Saving the similarity matrix...')
        sps.save_npz(
            'raw_data/saved_sim_matrix_evaluation_2/{}'.format(model.name),
            model.get_sim_matrix())
    # elif arg == 'v':
    #     model.validate(....)
    elif arg == 'x':
        pass
    else:
        log.error('Wrong option!')
Ejemplo n.º 8
0
def wizard_hybrid():
    SIM_MATRIX = ['saved_sim_matrix', 'saved_sim_matrix_evaluation']
    R_HAT = ['saved_r_hat', 'saved_r_hat_evaluation']
    SAVE = ['saved_sim_matrix', 'saved_r_hat']
    EVALUATE = ['saved_sim_matrix_evaluation', 'saved_r_hat_evaluation']

    start = time.time()

    matrices_array, folder, models = hb.create_matrices_array()

    print('matrices loaded in {:.2f} s'.format(time.time() - start))
    log.success('You have loaded: {}'.format(models))

    NORMALIZATION_MODE = normalization_mode_selection()

    if folder in SAVE:
        WEIGHTS = weights_selection(models)

        if folder in SIM_MATRIX:
            name, urm_filter_tracks, rel_path = option_selection_save('SIM')
            hybrid_rec = HybridSimilarity(
                matrices_array,
                normalization_mode=NORMALIZATION_MODE,
                urm_filter_tracks=urm_filter_tracks)
            sps.save_npz('raw_data/' + rel_path + name,
                         hybrid_rec.get_r_hat(weights_array=WEIGHTS))
        if folder in R_HAT:
            name, urm_filter_tracks, rel_path, EXPORT = option_selection_save(
                'R_HAT')
            hybrid_rec = HybridRHat(matrices_array,
                                    normalization_mode=NORMALIZATION_MODE,
                                    urm_filter_tracks=urm_filter_tracks)
            if EXPORT:
                N = ask_number_recommendations()
                recommendations = hybrid_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists(),
                    N=N)
                exportcsv(recommendations, path='submission', name=name)
            else:
                sps.save_npz('raw_data/' + rel_path + name,
                             hybrid_rec.get_r_hat(weights_array=WEIGHTS))

    elif folder in EVALUATE:
        log.success('|WHAT YOU WANT TO DO ???|')
        log.warning('\'1\' BAYESIAN SEARCH VALIDATION')
        log.warning('\'2\' HAND CRAFTED WEIGHTS')
        mode = input()[0]

        # BAYESIAN SEARCH
        if mode == '1':
            log.success(
                '|SELECT A NUMBER OF |||ITERATIONS||| FOR THE ALGORITHM|')
            iterations = float(input())
            urm_filter_tracks = data.get_urm_train_1()
            if folder in SIM_MATRIX:
                hybrid_rec = HybridSimilarity(
                    matrices_array,
                    normalization_mode=NORMALIZATION_MODE,
                    urm_filter_tracks=urm_filter_tracks)
            if folder in R_HAT:
                hybrid_rec = HybridRHat(matrices_array,
                                        normalization_mode=NORMALIZATION_MODE,
                                        urm_filter_tracks=urm_filter_tracks)
            hybrid_rec.validate(iterations=iterations,
                                urm_test=data.get_urm_test_1(),
                                userids=data.get_target_playlists())

        # MANUAL WEIGHTS
        elif mode == '2':
            WEIGHTS = weights_selection(models)
            urm_filter_tracks = data.get_urm_train_1()
            chose = option_selection_evaluation_2()  # save, evaluate or csv
            if chose == 's':
                log.success('|CHOSE A NAME FOR THE MATRIX...|')
                name = input()
                if folder in SIM_MATRIX:
                    type = 'SIM'
                    hybrid_rec = HybridSimilarity(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                elif folder in R_HAT:
                    type = 'R_HAT'
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)

                sps.save_npz('raw_data/saved_r_hat_evaluation/' + name,
                             hybrid_rec.get_r_hat(weights_array=WEIGHTS))
                sym_rec = symmetric_recommender_creator(
                    models,
                    type,
                    NORMALIZATION_MODE,
                    urm_filter_tracks=data.get_urm_train_2())
                sps.save_npz('raw_data/saved_r_hat_evaluation_2/' + name,
                             sym_rec.get_r_hat(weights_array=WEIGHTS))

            elif chose == 'e':
                if folder in SIM_MATRIX:
                    type = 'SIM'
                    hybrid_rec = HybridSimilarity(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                elif folder in R_HAT:
                    type = 'R_HAT'
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                N = ask_number_recommendations()
                print('Recommending...')
                recs = hybrid_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists(),
                    N=N)
                hybrid_rec.evaluate(recommendations=recs,
                                    test_urm=data.get_urm_test_1())

                # export the recommendations
                log.success(
                    'Do you want to save the CSV with these recomendations? (y/n)'
                )
                if input()[0] == 'y':
                    export_csv_wizard(recs)

                sym_rec = symmetric_recommender_creator(
                    models,
                    type,
                    NORMALIZATION_MODE,
                    urm_filter_tracks=data.get_urm_train_2())
                recs2 = sym_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists())
                sym_rec.evaluate(recommendations=recs2,
                                 test_urm=data.get_urm_test_2())

            elif chose == 'c':
                if folder in R_HAT:
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                    N = ask_number_recommendations()
                    print('Recommending...')
                    recs = hybrid_rec.recommend_batch(
                        weights_array=WEIGHTS,
                        target_userids=data.get_target_playlists(),
                        N=N)

                    export_csv_wizard(recs)
                else:
                    log.error('not implemented yet')
    else:
        log.error('WRONG FOLDER')
Ejemplo n.º 9
0
    print()
    log.success('++ What do you want to do? ++')
    log.warning('(t) Test the model with some default params')
    log.warning('(r) Save the R^')
    log.warning('(s) Save the similarity matrix')
    #log.warning('(v) Validate the model')
    log.warning('(x) Exit')
    arg = input()[0]
    print()
    
    model = CFContentUserBased()
    if arg == 't':
        model.test(k=60, distance=model.SIM_SPLUS, shrink=10, alpha=0.55, beta=0.75, l=0.25, c=0.25)
    elif arg == 'r':
        log.info('Wanna save for evaluation (y/n)?')
        choice = input()[0] == 'y'
        model.fit(data.get_urm_train_2(), distance=model.SIM_SPLUS,k=400,alpha=0.25,beta=0.5,shrink=0,l=0.25,c=0.25)
        print('Saving the R^...')
        model.save_r_hat(evaluation=choice)
    elif arg == 's':
        model.fit(data.get_urm_train_2(), distance=model.SIM_SPLUS,k=400,alpha=0.25,beta=0.5,shrink=0,l=0.25,c=0.25)
        print('Saving the similarity matrix...')
        sps.save_npz('raw_data/saved_sim_matrix_evaluation/{}'.format(model.name), model.get_sim_matrix())
    # elif arg == 'v':
    #     model.validate(iterations=50, urm_train=data.get_urm_train(), urm_test=data.get_urm_test(), targetids=data.get_target_playlists(),
    #         distance=model.SIM_P3ALPHA, k=(100, 600), alpha=(0,1), beta=(0, 1),shrink=(0,100),l=(0,1),c=(0,1))
    elif arg == 'x':
        pass
    else:
        log.error('Wrong option!')