def main(): args = cp.command_parser(training_command_parser, cp.predictor_command_parser, EsParse.early_stopping_command_parser) predictor = parse.get_predictor(args) dataset = DataHandler(dirname=args.dataset, extended_training_set=args.extended_set, shuffle_training=args.tshuffle) if args.dataset == "ml1m": args.min_iter = 50000 args.progress = 5000 elif args.dataset == 'netflix': args.min_iter = 600000 args.progress = 40000 elif args.dataset == 'rsc': args.min_iter = 800000 args.progress = 200000 predictor.prepare_networks(dataset.n_items) predictor.train(dataset, save_dir=dataset.dirname + "models/" + args.dir, progress=num(args.progress), autosave=args.save, max_iter=args.max_iter, min_iterations=args.min_iter, max_time=args.max_time, early_stopping=EsParse.get_early_stopper(args), load_last_model=args.load_last_model, validation_metrics=args.metrics.split(','))
def main(): args = parse.command_parser(parse.predictor_command_parser, test_command_parser) args.training_max_length = args.max_length if args.number_of_batches == -1: args.number_of_batches = "*" dataset = DataHandler(dirname=args.dataset) predictor = parse.get_predictor(args) predictor.prepare_networks(dataset.n_items) file = find_models(predictor, dataset, args) output_file = save_file_name(predictor, dataset, args) last_tested_batch = get_last_tested_batch(output_file) batches = [extract_number_of_epochs(file, args)] file = [file] print(file) for i, f in enumerate(file): if batches[i] > last_tested_batch: evaluator = run_tests(predictor, f, dataset, args, k=args.nb_of_predictions) print('-------------------') print('(', i + 1, '/', len(file), ') results on ' + f) print_results(evaluator, args.metrics.split(','), file=output_file, n_batches=batches[i])
def main(): args = parse.command_parser(parse.predictor_command_parser, test_command_parser) args.training_max_length = args.max_length # args.max_length = int(DATA_HANDLER.max_length/2) if args.number_of_batches == -1: args.number_of_batches = "*" dataset = DataHandler(dirname=args.dataset) predictor = parse.get_predictor(args) predictor.prepare_model(dataset) file = find_models(predictor, dataset, args) if args.number_of_batches == "*" and args.method != "UKNN" and args.method != "MM" and args.method != "POP": output_file = save_file_name(predictor, dataset, args) last_tested_batch = get_last_tested_batch(output_file) batches = np.array(map(extract_number_of_epochs, file)) sorted_ids = np.argsort(batches) batches = batches[sorted_ids] file = file[sorted_ids] for i, f in enumerate(file): if batches[i] > last_tested_batch: evaluator = run_tests(predictor, f, dataset, args, get_full_recommendation_list=args.save_rank, k=args.nb_of_predictions) print('-------------------') print('(',i+1 ,'/', len(file),') results on ' + f) print_results(evaluator, args.metrics.split(','), plot=False, file=output_file, n_batches=batches[i], print_full_rank_comparison=args.save_rank) else: evaluator = run_tests(predictor, file, dataset, args, get_full_recommendation_list=args.save_rank, k=args.nb_of_predictions) print_results(evaluator, args.metrics.split(','), file=save_file_name(predictor, dataset, args), print_full_rank_comparison=args.save_rank)
def main(): args = parse.command_parser(parse.predictor_command_parser, training_command_parser, parse.early_stopping_command_parser) predictor = parse.get_predictor(args) dataset = DataHandler(dirname=args.dataset, extended_training_set=args.extended_set, shuffle_training=args.tshuffle) predictor.prepare_model(dataset) predictor.train(dataset, save_dir=dataset.dirname + "models\\" + args.dir, time_based_progress=args.time_based_progress, progress=num(args.progress), autosave=args.save, max_progress_interval=args.mpi, max_iter=args.max_iter, min_iterations=args.min_iter, max_time=args.max_time, early_stopping=parse.get_early_stopper(args), load_last_model=args.load_last_model, validation_metrics=args.metrics.split(','))
def main(): sys.argv.extend(['--tshuffle', '--load_last_model', # '--extended_set', '-d', 'datasets/', '--save', 'Best', '--progress', '200', '--mpi', '1000.0', '--max_iter', '6000.0', '--max_time', '28800.0', '--min_iter', '100.0', '--es_m', 'StopAfterN', '--es_n', '3', '-m', 'RNN', '--r_t', 'GRU', '--r_l', '100-50', '--u_m', 'rmsprop', '--rf']) # #################################################### # # for RNNCluster # sys.argv.extend(['--dir', 'RNNCluster_', # '--metrics', 'recall,cluster_recall,sps,cluster_sps,ignored_items,assr', # '--loss', 'BPR', '--clusters', '10']) # #################################################### # for RNNOneHot sys.argv.extend(['--dir', 'RNNOneHot_', '--metrics', 'recall,sps', # ,ndcg,item_coverage,user_coverage,blockbuster_share '--loss', 'CCE']) # #################################################### # # for RNNMargin # sys.argv.extend(['--dir', 'RNNMargin_', # '--metrics', 'recall,sps', # '--loss', 'logit']) # #################################################### # # for RNNSampling # sys.argv.extend(['--dir', 'RNNSampling_', # '--metrics', 'recall,sps', # '--loss', 'BPR']) # #################################################### # # without MOVIES_FEATURES # sys.argv.extend(['--r_emb', '100']) # # with MOVIES_FEATURES sys.argv.extend(['--mf']) # #################################################### args = parse.command_parser(parse.predictor_command_parser, training_command_parser, early_stopping.early_stopping_command_parser) predictor = parse.get_predictor(args) dataset = DataHandler(dirname=args.dataset, extended_training_set=args.extended_set, shuffle_training=args.tshuffle) if args.mf: predictor.load_movies_features(dirname=dataset.dirname) predictor.prepare_model(dataset) predictor.train(dataset, save_dir=dataset.dirname + "models/" + args.dir, time_based_progress=args.time_based_progress, progress=num(args.progress), autosave=args.save, max_progress_interval=args.mpi, max_iter=args.max_iter, min_iterations=args.min_iter, max_time=args.max_time, early_stopping=early_stopping.get_early_stopper(args), load_last_model=args.load_last_model, validation_metrics=args.metrics.split(','))
def main(): args = cp.command_parser(training_command_parser, cp.predictor_command_parser) predictor = parse.get_predictor(args) dataset = DataHandler(dirname=args.dataset, extended_training_set=args.extended_set, shuffle_training=args.tshuffle) logdir = os.path.join(os.getcwd(),"logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1) predictor.prepare_networks(dataset.n_items) predictor.train(dataset, tensorboard_callback=tensorboard_callback, autosave=args.save, save_dir= dataset.dirname + "/models/" + args.dir, n_epoch=args.n_epoch, validation_metrics = args.metrics.split(','))
def main(): args = parse.command_parser(parse.predictor_command_parser, training_command_parser, parse.early_stopping_command_parser) predictor = parse.get_predictor(args) dataset = DataHandler(dirname=args.dataset, extended_training_set=args.extended_set, shuffle_training=args.tshuffle) predictor.prepare_model(dataset) predictor.train(dataset, save_dir=dataset.dirname + "models/" + args.dir, time_based_progress=args.time_based_progress, progress=num(args.progress), autosave=args.save, max_progress_interval=args.mpi, max_iter = args.max_iter, min_iterations=args.min_iter, max_time=args.max_time, early_stopping=parse.get_early_stopper(args), load_last_model=args.load_last_model, validation_metrics=args.metrics.split(','))
def fit(self, df_ratings=None, columns=['userId', 'itemId', 'rating'], verbose=False, **kwargs): self.columns = np.array(columns) # If Surprise lib is the base package to fit, then df_ratings must be used. # Algorithms that use Surprise Lib: NMF, SVD, KNN, SVDpp if (df_ratings is not None): self.df_ratings = df_ratings.copy() ########################################### # Convert Utility Matrix to df_ratings if utility matrix is passed # # ########################################### if self.name in self.surprise_algorithms: # Surprise-based recommenders from surprise import Dataset from surprise import Reader # A reader is still needed but only the rating_scale param is required. # The Reader class is used to parse a file containing ratings. reader = Reader(rating_scale=(0.5, 5.0)) # Separating timestamp column if ('timestamp' in columns): self.df_timestamp = self.df_ratings['timestamp'].copy() self.df_ratings.drop(labels='timestamp', inplace=True, axis=1) # The columns must correspond to user id, item id and ratings (in that order). data = Dataset.load_from_df( self.df_ratings[self.columns[np.where( self.columns != 'timestamp')]], reader) # Creting trainset variable to be used in prediction functions of Surprise self.trainset = data.build_full_trainset() # Creating Model if self.name == 'svd': from surprise import SVD # Setting Number of Factors in Matrix Factorization if ('n_factors' in kwargs): self.n_factors = kwargs['n_factors'] else: self.n_factors = 100 if (verbose): print("Using default number of factors: {}".format( self.n_factors)) # Setting number of epochs in stocastic gradient descent if ('n_epochs' in kwargs): self.n_epochs = kwargs['n_epochs'] else: self.n_epochs = 20 if (verbose): print("Using default number of epochs: {}".format( self.n_epochs)) self.model = SVD(n_factors=self.n_factors, n_epochs=self.n_epochs, verbose=verbose) elif self.name == 'nmf': from surprise import NMF # Setting Number of Factors in Matrix Factorization if ('n_factors' in kwargs): self.n_factors = kwargs['n_factors'] else: self.n_factors = 15 if (verbose): print("Using default number of factors: {}".format( self.n_factors)) # Setting number of epochs in stocastic gradient descent if ('n_epochs' in kwargs): self.n_epochs = kwargs['n_epochs'] else: self.n_epochs = 50 if (verbose): print("Using default number of epochs: {}".format( self.n_epochs)) self.model = NMF(n_factors=self.n_factors, n_epochs=self.n_epochs, verbose=verbose) elif self.name == 'knnbasic': from surprise import KNNBasic # Setting number of neighbours if ('k' in kwargs): self.k = kwargs['k'] else: self.k = 40 if (verbose): print("Using default k: {}".format(self.k)) # Setting minimum number of neighbours if ('k_min' in kwargs): self.k_min = kwargs['k_min'] else: self.k_min = 1 if (verbose): print("Using default k_min: {}".format(1)) self.model = KNNBasic(k=self.k, min_k=self.k_min, verbose=verbose) elif self.name == 'kmeans': from surprise import KNNWithMeans # Setting number of neighbours if ('k' in kwargs): self.k = kwargs['k'] else: self.k = 40 if (verbose): print("Using default k: {}".format(40)) # Setting minimum number of neighbours if ('k_min' in kwargs): self.k_min = kwargs['k_min'] else: self.k_min = 1 if (verbose): print("Using default k_min: {}".format(1)) self.model = KNNWithMeans(k=self.k, min_k=self.k_min, verbose=verbose) else: if (verbose): print("Algorithm not configured: {}".format(self.name)) return -1 # Train the algorithm on the trainset, and predict ratings for the testset self.model.train(self.trainset) return 0 elif (self.name in self.devooght_algorithms): # Arguments directory_path = os.path.join( '.', 'Sequence_based_recommendation_files', self.name) preprocess.create_dirs(dirname=directory_path, verbose=verbose) data = preprocess.remove_rare_elements(data=df_ratings, min_user_activity=1, min_item_popularity=1, verbose=verbose) data = preprocess.save_index_mapping(data=data, dirname=directory_path, separator=',') train_set, val_set, test_set = preprocess.split_data( data=data, nb_val_users=0.1, # val_size nb_test_users=0.1, # test_size dirname=directory_path, verbose=verbose) preprocess.make_sequence_format(train_set=train_set, val_set=val_set, test_set=test_set, dirname=directory_path, verbose=verbose) preprocess.save_data_stats(data=data, train_set=train_set, val_set=val_set, test_set=test_set, dirname=directory_path, verbose=verbose) # Training Algorithm parser = parse.command_parser(parse.predictor_command_parser, train.training_command_parser, parse.early_stopping_command_parser) if self.name == 'fism': args = parser.parse_args([ '--dir', os.path.join(directory_path, 'models'), '-d', directory_path, #directory_path + '/', '-b', '20', # Batch size: the number of training examples present in a single blatch '--max_iter', '50', # Maximum number of iterations: the number of batches needed to complete one epoch '--progress', '10', # when progress information should be printed during training '-m', self.name.upper(), # Method #'-i', '-1', # Number of batches - only on test parser '--loss', 'RMSE', '--save', 'Best' ]) self.model = parse.get_predictor(args) dataset = handler.DataHandler( dirname=args.dataset, extended_training_set=args.extended_set, shuffle_training=args.tshuffle) self.model.prepare_model(dataset) self.metrics = self.model.train( dataset, save_dir=args.dir, time_based_progress=args.time_based_progress, progress=float(args.progress), autosave=args.save, max_progress_interval=args.mpi, max_iter=args.max_iter, min_iterations=args.min_iter, max_time=args.max_time, early_stopping=parse.get_early_stopper(args), load_last_model=args.load_last_model, validation_metrics=args.metrics.split(',')) else: if (verbose): print("Algorithm not configured: {}".format(self.name)) return -1 return 0 else: # if self.name not in self.surprise_algorithms if (verbose): print("Invalid algorithm: {}".format(self.name))
def main(): sys.argv.extend([ '-d', 'datasets/', '-k', '3', '--save', '-m', 'RNN', '--r_t', 'GRU', '--r_l', '100-50', '--u_m', 'rmsprop', '--rf' ]) # #################################################### # # for RNNCluster # sys.argv.extend(['--dir', 'RNNCluster_', # '--metrics', 'recall,sps,assr', # '--loss', 'BPR', '--clusters', '10']) # #################################################### # for RNNOneHot sys.argv.extend([ '--dir', 'RNNOneHot_', '--metrics', 'sps,item_coverage,user_coverage,recall', '--loss', 'CCE' ]) # #################################################### # # for RNNMargin # sys.argv.extend(['--dir', 'RNNMargin_', # '--metrics', 'recall,sps', # '--loss', 'logit']) # #################################################### # # for RNNSampling # sys.argv.extend(['--dir', 'RNNSampling_', # '--metrics', 'recall,sps', # '--loss', 'BPR']) # #################################################### # # without MOVIES_FEATURES # sys.argv.extend(['--r_emb', '100']) # with MOVIES_FEATURES sys.argv.extend(['--mf']) # #################################################### args = parse.command_parser(parse.predictor_command_parser, test_command_parser) args.training_max_length = args.max_length # args.max_length = int(DATA_HANDLER.max_length/2) if args.number_of_batches == -1: args.number_of_batches = "*" dataset = DataHandler(dirname=args.dataset) predictor = parse.get_predictor(args) if args.mf: predictor.load_movies_features(dirname=dataset.dirname) predictor.prepare_model(dataset) file = find_models(predictor, dataset, args) if args.number_of_batches == "*" and args.method != "UKNN" and args.method != "MM" and args.method != "POP": output_file = save_file_name(predictor, dataset, args) last_tested_batch = get_last_tested_batch(output_file) batches = np.array(list(map(extract_number_of_epochs, file))) sorted_ids = np.argsort(batches) batches = batches[sorted_ids] file = file[sorted_ids] for i, f in enumerate(file): if batches[i] > last_tested_batch: evaluator = run_tests( predictor, f, dataset, args, get_full_recommendation_list=args.save_rank, k=args.nb_of_predictions) print('-------------------') print('(', i + 1, '/', len(file), ') results on ' + f) print_results(evaluator, args.metrics.split(','), plot=False, file=output_file, n_batches=batches[i], print_full_rank_comparison=args.save_rank) break else: evaluator = run_tests(predictor, file, dataset, args, get_full_recommendation_list=args.save_rank, k=args.nb_of_predictions) print_results(evaluator, args.metrics.split(','), file=save_file_name(predictor, dataset, args), print_full_rank_comparison=args.save_rank)