def main(): batch_size = 32 epochs = 15 lr = 0.01 decay = 1e-6 vgg16_pretrained = True set_parameters(batch_size, epochs, lr, decay) network = input( "Choose model: 0 for baseline, 1 for enhanced_1(VGG16), 2 for enhanced_2(batch normalization) :" ) if K.image_data_format() != 'channels_last': print "This program needs image_data_format to be channels_last please update ~/keras/keras.json" return if K.backend() != 'tensorflow': print "This program has been developed and tested on tensorflow backend. For best performance use tensorflow." print "Execution will continue, but problems may occur" print "Preprocessing images..." program_start = t.clock() start = t.clock() [X, Y] = preprocess() preprocess_time = t.clock() - start print "Training Model..." start = t.clock() if network == 0: print 'Using Baseline Network' model = train_baseline(X, Y) elif network == 1: print 'Using VGG16 Network' model = train_vgg16(X, Y, vgg16_pretrained) elif network == 2: print 'Using Baseline Network with Batch Normalization ' model = train_batch_normalized(X, Y) else: print 'Invalid network, using baseline instead' network = 0 model = train_baseline(X, Y) training_time = t.clock() - start print "Testing..." start = t.clock() accuracy = test_model(model) test_time = t.clock() - start total_time = t.clock() - program_start print "\nSystem Details" print "Backend = " + K.backend() print "Data format = " + K.image_data_format() print "\nTraining Details" print "Network = " + NETWORK_TYPE[network] print "Batch size = {}".format(batch_size) print "Number of Epochs = {}".format(epochs) print "Learning rate = {}".format(lr) print "Decay = {}".format(decay) print "\nTest accuracy = {:.2%}".format(accuracy) print "\nTimings" print "Pre-processing Time = {:.2f} seconds".format(preprocess_time) print "Training Time = {:.2f} seconds".format(training_time) print "Time per epoch = {:.2f} seconds".format(training_time / float(epochs)) print "Testing Time = {:.2f} seconds".format(test_time) print "Total time of execution = {:.0f} minutes {:.2f} seconds".format( total_time / 60, total_time % 60) f_name = str(NETWORK_TYPE[network] + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(lr) + '.txt') w_file = open('./training_details/' + f_name, 'w') w_file.write("System Details") w_file.write("\nBackend = " + K.backend()) w_file.write("\nData format = " + K.image_data_format()) w_file.write("\n\nTraining Details") w_file.write("\nNetwork = " + NETWORK_TYPE[network]) w_file.write("\nBatch size = {}".format(batch_size)) w_file.write("\nNumber of Epochs = {}".format(epochs)) w_file.write("\nLearning rate = {}".format(lr)) w_file.write("\nDecay = {}".format(decay)) w_file.write("\nTest accuracy = {:.2%}".format(accuracy)) w_file.write("\n\nTimings") w_file.write( "\nPre-processing Time = {:.2f} seconds".format(preprocess_time)) w_file.write("\nTraining Time = {:.2f} seconds".format(training_time)) w_file.write("\nTime per epoch = {:.2f} seconds".format(training_time / float(epochs))) w_file.write("\nTesting Time = {:.2f} seconds".format(test_time)) w_file.write( "\nTotal time of execution = {:.0f} minutes {:.2f} seconds".format( total_time / 60, total_time % 60)) w_file.close()
def settings(): if request.method == 'POST': set_parameters(dict(request.form)) return render_template('index.html') else: return render_template('settings.html', get_parameters=get_parameters)
neg_count = FLAGS.neg learning_rate = 0.001 config = Config() dataset = Dataset(config.filename, limit=limit) set_parameters( normalized_popularity=dataset.normalized_popularity, loss_alpha=loss_alpha, loss_beta=loss_beta, loss_scale=loss_scale, loss_percentile=get_percentile(dataset.normalized_popularity, 45), metrics_alpha=metrics_alpha, metrics_beta=metrics_beta, metrics_gamma=metrics_gamma, metrics_scale=metrics_scale, metrics_percentile=metrics_percentile, loss_type=loss_type, k=k, k_trainable=k_trainable, low_popularity_threshold=dataset.thresholds[0], high_popularity_threshold=dataset.thresholds[1] ) # ----------------------------------------------------------------------------- config.item_count = dataset.item_count config.user_count = dataset.user_count tf.logging.info("\n\n%s\n\n" % config)
def read_data_split_and_search_CMN(dataset_name): from Conferences.SIGIR.CMN_our_interface.CiteULike.CiteULikeReader import CiteULikeReader from Conferences.SIGIR.CMN_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader from Conferences.SIGIR.CMN_our_interface.Epinions.EpinionsReader import EpinionsReader if dataset_name == "citeulike": dataset = CiteULikeReader() elif dataset_name == "epinions": dataset = EpinionsReader() elif dataset_name == "pinterest": dataset = PinterestICCVReader() output_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() URM_test_negative = dataset.URM_test_negative.copy() test_mode = False limit = False if limit: p = 700 URM_train = URM_train[:p, :] URM_validation = URM_validation[:p, :] URM_test = URM_test[:p, :] URM_test_negative = URM_test_negative[:p, :] ''' user: 3 is_relevant_current_cutoff: [ True True True False False] recommended_items_current_cutoff: [ 65 86 68 3671 1341] Warning! is_relevant_current_cutoff.sum()>1: 3 relevant_items: [65 68 81 86] relevant_items_rating: [1. 1. 1. 1.] items_to_compute: [ 42 62 65 68 81 86 148 218 559 662 776 792 1164 1341 1418 1491 1593 1603 1617 1697 2140 2251 2446 2517 2566 2643 2719 2769 2771 3081 3133 3161 3188 3268 3409 3666 3671 3845 3864 3897 3984 4272 4327 4329 4431 4519 4565 4568 4718 4812 4915 5096 5128 5137 5141 5184 5217 5241 5371 5394 5415 5492 5521 5775 5798 5830 5831 5931 6005 6281 6375 6558 6638 6644 6661 6705 6881 6898 6939 6970 7010 7018 7147 7224 7327 7404 7453 7466 7475 7561 7764 8064 8102 8222 8368 8530 8957 9101 9322 9368 9619 9782 9832] ''' print('USER 3') print('test ', URM_test[3]) print('train ', URM_train[3]) print('valid ', URM_validation[3]) print('neg ', URM_test_negative[3]) # Durante l'esecuzione era stato notato un HR>1. Il motivo e' che veniva calcolato sul validation set (che per ogni utente ha # piu' oggetti preferiti (non uno) # Alla fine l'HR sara' minore o uguale ad uno perche' e' calcolato sul test set. popularity = get_popularity(URM_train) min_value = np.min(popularity) max_value = np.max(popularity) gap = max_value - min_value popularity = (popularity - min_value) / gap print('Luciano > min:', min_value) print('Luciano > max:', max_value) print('Luciano > normalized popularity:', popularity) set_parameters(popularity=popularity, loss_alpha=200, loss_beta=0.02, loss_scale=1, loss_percentile=get_percentile(popularity, 45), metrics_alpha=100, metrics_beta=0.03, metrics_gamma=5, metrics_scale=1 / 15, metrics_percentile=0.45, new_loss=False) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] # metric_to_optimize = "WEIGHTED_HIT_RATE" metric_to_optimize = "HIT_RATE" # metric_to_optimize = "CUSTOM_HIT_RATE" print('metric_to_optimize:', metric_to_optimize) # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) if dataset_name == "citeulike": assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_test, URM_test_negative]) elif dataset_name == "pinterest": assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test_negative]) else: assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[5]) if not test_mode: evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[5, 10]) else: evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[5]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, n_cases=35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: if not test_mode: runParameterSearch_Collaborative_partial(recommender_class) else: print('skipping', recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### CMN ''' Parameters from original paper: { "batch_size": 128, "decay_rate": 0.9, "embed_size": 50, "filename": "data/pinterest.npz", "grad_clip": 5.0, "hops": 2, "item_count": "9916", "l2": 0.1, "learning_rate": 0.001, "logdir": "result/004/", "max_neighbors": 1586, "neg_count": 4, "optimizer": "rmsprop", "optimizer_params": "{'momentum': 0.9, 'decay': 0.9}", "pretrain": "pretrain/pinterest_e50.npz", "save_directory": "result/004/", "tol": 1e-05, "user_count": "55187" } ''' try: temp_file_folder = output_folder_path + "{}_log/".format( ALGORITHM_NAME) CMN_article_parameters = { "epochs": 100, "epochs_gmf": 100, "hops": 3, "neg_samples": 4, "reg_l2_cmn": 1e-1, "reg_l2_gmf": 1e-4, "pretrain": True, "learning_rate": 1e-3, "verbose": False, "temp_file_folder": temp_file_folder } if dataset_name == "citeulike": CMN_article_parameters["batch_size"] = 128 CMN_article_parameters["embed_size"] = 50 elif dataset_name == "epinions": CMN_article_parameters["batch_size"] = 128 CMN_article_parameters["embed_size"] = 40 elif dataset_name == "pinterest": CMN_article_parameters["batch_size"] = 128 # CMN_article_parameters["batch_size"] = 256 CMN_article_parameters["embed_size"] = 50 CMN_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CMN_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=CMN_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=CMN_article_parameters, output_folder_path=output_folder_path, output_file_name_root=CMN_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(CMN_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[CMN_RecommenderWrapper], ICM_names_to_report_list=[], n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) if not test_mode: print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=[5, 10], ICM_names_to_report_list=[], other_algorithm_list=[CMN_RecommenderWrapper]) else: print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=[5], ICM_names_to_report_list=[], other_algorithm_list=[CMN_RecommenderWrapper])