def main():

    args = cp.command_parser(training_command_parser,
                             cp.predictor_command_parser,
                             EsParse.early_stopping_command_parser)
    predictor = parse.get_predictor(args)

    dataset = DataHandler(dirname=args.dataset,
                          extended_training_set=args.extended_set,
                          shuffle_training=args.tshuffle)
    if args.dataset == "ml1m":
        args.min_iter = 50000
        args.progress = 5000
    elif args.dataset == 'netflix':
        args.min_iter = 600000
        args.progress = 40000
    elif args.dataset == 'rsc':
        args.min_iter = 800000
        args.progress = 200000

    predictor.prepare_networks(dataset.n_items)
    predictor.train(dataset,
                    save_dir=dataset.dirname + "models/" + args.dir,
                    progress=num(args.progress),
                    autosave=args.save,
                    max_iter=args.max_iter,
                    min_iterations=args.min_iter,
                    max_time=args.max_time,
                    early_stopping=EsParse.get_early_stopper(args),
                    load_last_model=args.load_last_model,
                    validation_metrics=args.metrics.split(','))
Ejemplo n.º 2
0
def main():
    args = parse.command_parser(parse.predictor_command_parser,
                                test_command_parser)

    args.training_max_length = args.max_length
    if args.number_of_batches == -1:
        args.number_of_batches = "*"

    dataset = DataHandler(dirname=args.dataset)
    predictor = parse.get_predictor(args)
    predictor.prepare_networks(dataset.n_items)
    file = find_models(predictor, dataset, args)

    output_file = save_file_name(predictor, dataset, args)

    last_tested_batch = get_last_tested_batch(output_file)
    batches = [extract_number_of_epochs(file, args)]
    file = [file]
    print(file)

    for i, f in enumerate(file):
        if batches[i] > last_tested_batch:
            evaluator = run_tests(predictor,
                                  f,
                                  dataset,
                                  args,
                                  k=args.nb_of_predictions)
            print('-------------------')
            print('(', i + 1, '/', len(file), ') results on ' + f)
            print_results(evaluator,
                          args.metrics.split(','),
                          file=output_file,
                          n_batches=batches[i])
def main():
	
	args = parse.command_parser(parse.predictor_command_parser, test_command_parser)

	args.training_max_length = args.max_length
	# args.max_length = int(DATA_HANDLER.max_length/2)
	if args.number_of_batches == -1:
		args.number_of_batches = "*"

	dataset = DataHandler(dirname=args.dataset)
	predictor = parse.get_predictor(args)
	predictor.prepare_model(dataset)
	file = find_models(predictor, dataset, args)

	if args.number_of_batches == "*" and args.method != "UKNN" and args.method != "MM" and args.method != "POP":
		
		output_file = save_file_name(predictor, dataset, args)

		last_tested_batch = get_last_tested_batch(output_file)
		batches = np.array(map(extract_number_of_epochs, file))
		sorted_ids = np.argsort(batches)
		batches = batches[sorted_ids]
		file = file[sorted_ids]
		for i, f in enumerate(file):
			if batches[i] > last_tested_batch:
				evaluator = run_tests(predictor, f, dataset, args, get_full_recommendation_list=args.save_rank, k=args.nb_of_predictions)
				print('-------------------')
				print('(',i+1 ,'/', len(file),') results on ' + f)
				print_results(evaluator, args.metrics.split(','), plot=False, file=output_file, n_batches=batches[i], print_full_rank_comparison=args.save_rank)
	else:
		evaluator = run_tests(predictor, file, dataset, args, get_full_recommendation_list=args.save_rank, k=args.nb_of_predictions)
		print_results(evaluator, args.metrics.split(','), file=save_file_name(predictor, dataset, args), print_full_rank_comparison=args.save_rank)
def main():

	args = parse.command_parser(parse.predictor_command_parser, test_command_parser)

	args.training_max_length = args.max_length
	# args.max_length = int(DATA_HANDLER.max_length/2)
	if args.number_of_batches == -1:
		args.number_of_batches = "*"

	dataset = DataHandler(dirname=args.dataset)
	predictor = parse.get_predictor(args)
	predictor.prepare_model(dataset)
	file = find_models(predictor, dataset, args)

	if args.number_of_batches == "*" and args.method != "UKNN" and args.method != "MM" and args.method != "POP":

		output_file = save_file_name(predictor, dataset, args)

		last_tested_batch = get_last_tested_batch(output_file)
		batches = np.array(map(extract_number_of_epochs, file))
		sorted_ids = np.argsort(batches)
		batches = batches[sorted_ids]
		file = file[sorted_ids]
		for i, f in enumerate(file):
			if batches[i] > last_tested_batch:
				evaluator = run_tests(predictor, f, dataset, args, get_full_recommendation_list=args.save_rank, k=args.nb_of_predictions)
				print('-------------------')
				print('(',i+1 ,'/', len(file),') results on ' + f)
				print_results(evaluator, args.metrics.split(','), plot=False, file=output_file, n_batches=batches[i], print_full_rank_comparison=args.save_rank)
	else:
		evaluator = run_tests(predictor, file, dataset, args, get_full_recommendation_list=args.save_rank, k=args.nb_of_predictions)
		print_results(evaluator, args.metrics.split(','), file=save_file_name(predictor, dataset, args), print_full_rank_comparison=args.save_rank)
Ejemplo n.º 5
0
def main():

    args = parse.command_parser(parse.predictor_command_parser,
                                training_command_parser,
                                parse.early_stopping_command_parser)

    predictor = parse.get_predictor(args)

    dataset = DataHandler(dirname=args.dataset,
                          extended_training_set=args.extended_set,
                          shuffle_training=args.tshuffle)

    predictor.prepare_model(dataset)
    predictor.train(dataset,
                    save_dir=dataset.dirname + "models\\" + args.dir,
                    time_based_progress=args.time_based_progress,
                    progress=num(args.progress),
                    autosave=args.save,
                    max_progress_interval=args.mpi,
                    max_iter=args.max_iter,
                    min_iterations=args.min_iter,
                    max_time=args.max_time,
                    early_stopping=parse.get_early_stopper(args),
                    load_last_model=args.load_last_model,
                    validation_metrics=args.metrics.split(','))
Ejemplo n.º 6
0
def main():
    sys.argv.extend(['--tshuffle', '--load_last_model',  # '--extended_set',
                     '-d', 'datasets/',
                     '--save', 'Best',
                     '--progress', '200', '--mpi', '1000.0',
                     '--max_iter', '6000.0', '--max_time', '28800.0', '--min_iter', '100.0',
                     '--es_m', 'StopAfterN', '--es_n', '3',
                     '-m', 'RNN', '--r_t', 'GRU', '--r_l', '100-50',
                     '--u_m', 'rmsprop',
                     '--rf'])
    # ####################################################
    # # for RNNCluster
    # sys.argv.extend(['--dir', 'RNNCluster_',
    #                  '--metrics', 'recall,cluster_recall,sps,cluster_sps,ignored_items,assr',
    #                  '--loss', 'BPR', '--clusters', '10'])
    # ####################################################
    # for RNNOneHot
    sys.argv.extend(['--dir', 'RNNOneHot_',
                     '--metrics', 'recall,sps',  # ,ndcg,item_coverage,user_coverage,blockbuster_share
                     '--loss', 'CCE'])
    # ####################################################
    # # for RNNMargin
    # sys.argv.extend(['--dir', 'RNNMargin_',
    #                  '--metrics', 'recall,sps',
    #                  '--loss', 'logit'])
    # ####################################################
    # # for RNNSampling
    # sys.argv.extend(['--dir', 'RNNSampling_',
    #                  '--metrics', 'recall,sps',
    #                  '--loss', 'BPR'])
    # ####################################################
    # # without MOVIES_FEATURES
    # sys.argv.extend(['--r_emb', '100'])
    # # with MOVIES_FEATURES
    sys.argv.extend(['--mf'])
    # ####################################################

    args = parse.command_parser(parse.predictor_command_parser, training_command_parser,
                                early_stopping.early_stopping_command_parser)

    predictor = parse.get_predictor(args)

    dataset = DataHandler(dirname=args.dataset, extended_training_set=args.extended_set, shuffle_training=args.tshuffle)

    if args.mf:
        predictor.load_movies_features(dirname=dataset.dirname)

    predictor.prepare_model(dataset)
    predictor.train(dataset,
                    save_dir=dataset.dirname + "models/" + args.dir,
                    time_based_progress=args.time_based_progress,
                    progress=num(args.progress),
                    autosave=args.save,
                    max_progress_interval=args.mpi,
                    max_iter=args.max_iter,
                    min_iterations=args.min_iter,
                    max_time=args.max_time,
                    early_stopping=early_stopping.get_early_stopper(args),
                    load_last_model=args.load_last_model,
                    validation_metrics=args.metrics.split(','))
Ejemplo n.º 7
0
def main():

	args = cp.command_parser(training_command_parser, cp.predictor_command_parser)
	predictor = parse.get_predictor(args)

	dataset = DataHandler(dirname=args.dataset, extended_training_set=args.extended_set, shuffle_training=args.tshuffle)
	logdir = os.path.join(os.getcwd(),"logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
	tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

	predictor.prepare_networks(dataset.n_items)

	predictor.train(dataset,
		tensorboard_callback=tensorboard_callback,
		autosave=args.save,
		save_dir= dataset.dirname + "/models/" + args.dir,
		n_epoch=args.n_epoch,
	    validation_metrics = args.metrics.split(','))
def main():

	
	args = parse.command_parser(parse.predictor_command_parser, training_command_parser, parse.early_stopping_command_parser)

	predictor = parse.get_predictor(args)

	
	dataset = DataHandler(dirname=args.dataset, extended_training_set=args.extended_set, shuffle_training=args.tshuffle)

	predictor.prepare_model(dataset)
	predictor.train(dataset, 
		save_dir=dataset.dirname + "models/" + args.dir, 
		time_based_progress=args.time_based_progress, 
		progress=num(args.progress), 
		autosave=args.save, 
		max_progress_interval=args.mpi, 
		max_iter = args.max_iter,
		min_iterations=args.min_iter,
		max_time=args.max_time,
		early_stopping=parse.get_early_stopper(args),
		load_last_model=args.load_last_model,
		validation_metrics=args.metrics.split(','))
Ejemplo n.º 9
0
    def fit(self,
            df_ratings=None,
            columns=['userId', 'itemId', 'rating'],
            verbose=False,
            **kwargs):

        self.columns = np.array(columns)
        # If Surprise lib is the base package to fit, then df_ratings must be used.
        # Algorithms that use Surprise Lib: NMF, SVD, KNN, SVDpp

        if (df_ratings is not None):
            self.df_ratings = df_ratings.copy()

        ###########################################
        # Convert Utility Matrix to df_ratings if utility matrix is passed
        #
        #
        ###########################################

        if self.name in self.surprise_algorithms:  # Surprise-based recommenders
            from surprise import Dataset
            from surprise import Reader

            # A reader is still needed but only the rating_scale param is required.
            # The Reader class is used to parse a file containing ratings.
            reader = Reader(rating_scale=(0.5, 5.0))

            # Separating timestamp column
            if ('timestamp' in columns):
                self.df_timestamp = self.df_ratings['timestamp'].copy()
                self.df_ratings.drop(labels='timestamp', inplace=True, axis=1)

            # The columns must correspond to user id, item id and ratings (in that order).
            data = Dataset.load_from_df(
                self.df_ratings[self.columns[np.where(
                    self.columns != 'timestamp')]], reader)

            # Creting trainset variable to be used in prediction functions of Surprise
            self.trainset = data.build_full_trainset()

            # Creating Model
            if self.name == 'svd':
                from surprise import SVD

                # Setting Number of Factors in Matrix Factorization
                if ('n_factors' in kwargs):
                    self.n_factors = kwargs['n_factors']
                else:
                    self.n_factors = 100
                    if (verbose):
                        print("Using default number of factors: {}".format(
                            self.n_factors))

                # Setting number of epochs in stocastic gradient descent
                if ('n_epochs' in kwargs):
                    self.n_epochs = kwargs['n_epochs']
                else:
                    self.n_epochs = 20
                    if (verbose):
                        print("Using default number of epochs: {}".format(
                            self.n_epochs))

                self.model = SVD(n_factors=self.n_factors,
                                 n_epochs=self.n_epochs,
                                 verbose=verbose)

            elif self.name == 'nmf':
                from surprise import NMF

                # Setting Number of Factors in Matrix Factorization
                if ('n_factors' in kwargs):
                    self.n_factors = kwargs['n_factors']
                else:
                    self.n_factors = 15
                    if (verbose):
                        print("Using default number of factors: {}".format(
                            self.n_factors))

                # Setting number of epochs in stocastic gradient descent
                if ('n_epochs' in kwargs):
                    self.n_epochs = kwargs['n_epochs']
                else:
                    self.n_epochs = 50
                    if (verbose):
                        print("Using default number of epochs: {}".format(
                            self.n_epochs))

                self.model = NMF(n_factors=self.n_factors,
                                 n_epochs=self.n_epochs,
                                 verbose=verbose)

            elif self.name == 'knnbasic':
                from surprise import KNNBasic

                # Setting number of neighbours
                if ('k' in kwargs):
                    self.k = kwargs['k']
                else:
                    self.k = 40
                    if (verbose):
                        print("Using default k: {}".format(self.k))

                # Setting minimum number of neighbours
                if ('k_min' in kwargs):
                    self.k_min = kwargs['k_min']
                else:
                    self.k_min = 1
                    if (verbose):
                        print("Using default k_min: {}".format(1))

                self.model = KNNBasic(k=self.k,
                                      min_k=self.k_min,
                                      verbose=verbose)

            elif self.name == 'kmeans':
                from surprise import KNNWithMeans

                # Setting number of neighbours
                if ('k' in kwargs):
                    self.k = kwargs['k']
                else:
                    self.k = 40
                    if (verbose):
                        print("Using default k: {}".format(40))

                # Setting minimum number of neighbours
                if ('k_min' in kwargs):
                    self.k_min = kwargs['k_min']
                else:
                    self.k_min = 1
                    if (verbose):
                        print("Using default k_min: {}".format(1))

                self.model = KNNWithMeans(k=self.k,
                                          min_k=self.k_min,
                                          verbose=verbose)

            else:
                if (verbose):
                    print("Algorithm not configured: {}".format(self.name))
                return -1

            # Train the algorithm on the trainset, and predict ratings for the testset
            self.model.train(self.trainset)

            return 0

        elif (self.name in self.devooght_algorithms):

            # Arguments
            directory_path = os.path.join(
                '.', 'Sequence_based_recommendation_files', self.name)
            preprocess.create_dirs(dirname=directory_path, verbose=verbose)

            data = preprocess.remove_rare_elements(data=df_ratings,
                                                   min_user_activity=1,
                                                   min_item_popularity=1,
                                                   verbose=verbose)

            data = preprocess.save_index_mapping(data=data,
                                                 dirname=directory_path,
                                                 separator=',')

            train_set, val_set, test_set = preprocess.split_data(
                data=data,
                nb_val_users=0.1,  # val_size
                nb_test_users=0.1,  # test_size
                dirname=directory_path,
                verbose=verbose)

            preprocess.make_sequence_format(train_set=train_set,
                                            val_set=val_set,
                                            test_set=test_set,
                                            dirname=directory_path,
                                            verbose=verbose)

            preprocess.save_data_stats(data=data,
                                       train_set=train_set,
                                       val_set=val_set,
                                       test_set=test_set,
                                       dirname=directory_path,
                                       verbose=verbose)

            # Training Algorithm
            parser = parse.command_parser(parse.predictor_command_parser,
                                          train.training_command_parser,
                                          parse.early_stopping_command_parser)

            if self.name == 'fism':
                args = parser.parse_args([
                    '--dir',
                    os.path.join(directory_path, 'models'),
                    '-d',
                    directory_path,  #directory_path + '/', 
                    '-b',
                    '20',  # Batch size: the number of training examples present in a single blatch
                    '--max_iter',
                    '50',  # Maximum number of iterations: the number of batches needed to complete one epoch
                    '--progress',
                    '10',  # when progress information should be printed during training
                    '-m',
                    self.name.upper(),  # Method
                    #'-i', '-1', # Number of batches - only on test parser
                    '--loss',
                    'RMSE',
                    '--save',
                    'Best'
                ])

                self.model = parse.get_predictor(args)

                dataset = handler.DataHandler(
                    dirname=args.dataset,
                    extended_training_set=args.extended_set,
                    shuffle_training=args.tshuffle)

                self.model.prepare_model(dataset)
                self.metrics = self.model.train(
                    dataset,
                    save_dir=args.dir,
                    time_based_progress=args.time_based_progress,
                    progress=float(args.progress),
                    autosave=args.save,
                    max_progress_interval=args.mpi,
                    max_iter=args.max_iter,
                    min_iterations=args.min_iter,
                    max_time=args.max_time,
                    early_stopping=parse.get_early_stopper(args),
                    load_last_model=args.load_last_model,
                    validation_metrics=args.metrics.split(','))

            else:
                if (verbose):
                    print("Algorithm not configured: {}".format(self.name))
                return -1

            return 0

        else:  # if self.name not in self.surprise_algorithms
            if (verbose):
                print("Invalid algorithm: {}".format(self.name))
def main():
    sys.argv.extend([
        '-d', 'datasets/', '-k', '3', '--save', '-m', 'RNN', '--r_t', 'GRU',
        '--r_l', '100-50', '--u_m', 'rmsprop', '--rf'
    ])
    # ####################################################
    # # for RNNCluster
    # sys.argv.extend(['--dir', 'RNNCluster_',
    #                  '--metrics', 'recall,sps,assr',
    #                  '--loss', 'BPR', '--clusters', '10'])
    # ####################################################
    # for RNNOneHot
    sys.argv.extend([
        '--dir', 'RNNOneHot_', '--metrics',
        'sps,item_coverage,user_coverage,recall', '--loss', 'CCE'
    ])
    # ####################################################
    # # for RNNMargin
    # sys.argv.extend(['--dir', 'RNNMargin_',
    #                  '--metrics', 'recall,sps',
    #                  '--loss', 'logit'])
    # ####################################################
    # # for RNNSampling
    # sys.argv.extend(['--dir', 'RNNSampling_',
    #                  '--metrics', 'recall,sps',
    #                  '--loss', 'BPR'])
    # ####################################################
    # # without MOVIES_FEATURES
    # sys.argv.extend(['--r_emb', '100'])
    # with MOVIES_FEATURES
    sys.argv.extend(['--mf'])
    # ####################################################

    args = parse.command_parser(parse.predictor_command_parser,
                                test_command_parser)

    args.training_max_length = args.max_length
    # args.max_length = int(DATA_HANDLER.max_length/2)
    if args.number_of_batches == -1:
        args.number_of_batches = "*"

    dataset = DataHandler(dirname=args.dataset)
    predictor = parse.get_predictor(args)

    if args.mf:
        predictor.load_movies_features(dirname=dataset.dirname)

    predictor.prepare_model(dataset)
    file = find_models(predictor, dataset, args)

    if args.number_of_batches == "*" and args.method != "UKNN" and args.method != "MM" and args.method != "POP":

        output_file = save_file_name(predictor, dataset, args)

        last_tested_batch = get_last_tested_batch(output_file)
        batches = np.array(list(map(extract_number_of_epochs, file)))
        sorted_ids = np.argsort(batches)
        batches = batches[sorted_ids]
        file = file[sorted_ids]
        for i, f in enumerate(file):
            if batches[i] > last_tested_batch:
                evaluator = run_tests(
                    predictor,
                    f,
                    dataset,
                    args,
                    get_full_recommendation_list=args.save_rank,
                    k=args.nb_of_predictions)
                print('-------------------')
                print('(', i + 1, '/', len(file), ') results on ' + f)
                print_results(evaluator,
                              args.metrics.split(','),
                              plot=False,
                              file=output_file,
                              n_batches=batches[i],
                              print_full_rank_comparison=args.save_rank)
                break
    else:
        evaluator = run_tests(predictor,
                              file,
                              dataset,
                              args,
                              get_full_recommendation_list=args.save_rank,
                              k=args.nb_of_predictions)
        print_results(evaluator,
                      args.metrics.split(','),
                      file=save_file_name(predictor, dataset, args),
                      print_full_rank_comparison=args.save_rank)