def main():
    batch_size = 32
    epochs = 15
    lr = 0.01
    decay = 1e-6
    vgg16_pretrained = True

    set_parameters(batch_size, epochs, lr, decay)

    network = input(
        "Choose model: 0 for baseline, 1 for enhanced_1(VGG16), 2 for enhanced_2(batch normalization) :"
    )
    if K.image_data_format() != 'channels_last':
        print "This program needs image_data_format to be channels_last please update ~/keras/keras.json"
        return

    if K.backend() != 'tensorflow':
        print "This program has been developed and tested on tensorflow backend. For best performance use tensorflow."
        print "Execution will continue, but problems may occur"

    print "Preprocessing images..."
    program_start = t.clock()
    start = t.clock()
    [X, Y] = preprocess()
    preprocess_time = t.clock() - start

    print "Training Model..."
    start = t.clock()
    if network == 0:
        print 'Using Baseline Network'
        model = train_baseline(X, Y)
    elif network == 1:
        print 'Using VGG16 Network'
        model = train_vgg16(X, Y, vgg16_pretrained)
    elif network == 2:
        print 'Using Baseline Network with Batch Normalization '
        model = train_batch_normalized(X, Y)
    else:
        print 'Invalid network, using baseline instead'
        network = 0
        model = train_baseline(X, Y)

    training_time = t.clock() - start
    print "Testing..."
    start = t.clock()
    accuracy = test_model(model)
    test_time = t.clock() - start
    total_time = t.clock() - program_start

    print "\nSystem Details"
    print "Backend = " + K.backend()
    print "Data format = " + K.image_data_format()

    print "\nTraining Details"
    print "Network = " + NETWORK_TYPE[network]
    print "Batch size = {}".format(batch_size)
    print "Number of Epochs = {}".format(epochs)
    print "Learning rate = {}".format(lr)
    print "Decay = {}".format(decay)

    print "\nTest accuracy = {:.2%}".format(accuracy)

    print "\nTimings"
    print "Pre-processing Time = {:.2f} seconds".format(preprocess_time)
    print "Training Time = {:.2f} seconds".format(training_time)
    print "Time per epoch = {:.2f} seconds".format(training_time /
                                                   float(epochs))
    print "Testing Time =  {:.2f} seconds".format(test_time)
    print "Total time of execution = {:.0f} minutes {:.2f} seconds".format(
        total_time / 60, total_time % 60)

    f_name = str(NETWORK_TYPE[network] + '_' + str(epochs) + '_' +
                 str(batch_size) + '_' + str(lr) + '.txt')
    w_file = open('./training_details/' + f_name, 'w')
    w_file.write("System Details")
    w_file.write("\nBackend = " + K.backend())
    w_file.write("\nData format = " + K.image_data_format())

    w_file.write("\n\nTraining Details")
    w_file.write("\nNetwork = " + NETWORK_TYPE[network])
    w_file.write("\nBatch size = {}".format(batch_size))
    w_file.write("\nNumber of Epochs = {}".format(epochs))
    w_file.write("\nLearning rate = {}".format(lr))
    w_file.write("\nDecay = {}".format(decay))
    w_file.write("\nTest accuracy = {:.2%}".format(accuracy))

    w_file.write("\n\nTimings")
    w_file.write(
        "\nPre-processing Time = {:.2f} seconds".format(preprocess_time))
    w_file.write("\nTraining Time = {:.2f} seconds".format(training_time))
    w_file.write("\nTime per epoch = {:.2f} seconds".format(training_time /
                                                            float(epochs)))
    w_file.write("\nTesting Time =  {:.2f} seconds".format(test_time))
    w_file.write(
        "\nTotal time of execution = {:.0f} minutes {:.2f} seconds".format(
            total_time / 60, total_time % 60))
    w_file.close()
Esempio n. 2
0
def settings():
    if request.method == 'POST':
        set_parameters(dict(request.form))
        return render_template('index.html')
    else:
        return render_template('settings.html', get_parameters=get_parameters)
            neg_count = FLAGS.neg
            learning_rate = 0.001


        config = Config()

        dataset = Dataset(config.filename, limit=limit)
        set_parameters(
            normalized_popularity=dataset.normalized_popularity,
            loss_alpha=loss_alpha,
            loss_beta=loss_beta,
            loss_scale=loss_scale,
            loss_percentile=get_percentile(dataset.normalized_popularity, 45),
            metrics_alpha=metrics_alpha,
            metrics_beta=metrics_beta,
            metrics_gamma=metrics_gamma,
            metrics_scale=metrics_scale,
            metrics_percentile=metrics_percentile,
            loss_type=loss_type,
            k=k,
            k_trainable=k_trainable,
            low_popularity_threshold=dataset.thresholds[0],
            high_popularity_threshold=dataset.thresholds[1]
        )

        # -----------------------------------------------------------------------------

        config.item_count = dataset.item_count
        config.user_count = dataset.user_count
        tf.logging.info("\n\n%s\n\n" % config)
Esempio n. 4
0
def read_data_split_and_search_CMN(dataset_name):
    from Conferences.SIGIR.CMN_our_interface.CiteULike.CiteULikeReader import CiteULikeReader
    from Conferences.SIGIR.CMN_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader
    from Conferences.SIGIR.CMN_our_interface.Epinions.EpinionsReader import EpinionsReader

    if dataset_name == "citeulike":
        dataset = CiteULikeReader()

    elif dataset_name == "epinions":
        dataset = EpinionsReader()

    elif dataset_name == "pinterest":
        dataset = PinterestICCVReader()

    output_folder_path = "result_experiments/{}/{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_name)

    URM_train = dataset.URM_train.copy()
    URM_validation = dataset.URM_validation.copy()
    URM_test = dataset.URM_test.copy()
    URM_test_negative = dataset.URM_test_negative.copy()

    test_mode = False
    limit = False
    if limit:
        p = 700
        URM_train = URM_train[:p, :]
        URM_validation = URM_validation[:p, :]
        URM_test = URM_test[:p, :]
        URM_test_negative = URM_test_negative[:p, :]
        '''
        user: 3
        is_relevant_current_cutoff: [ True  True  True False False]
        recommended_items_current_cutoff: [  65   86   68 3671 1341]
        Warning! is_relevant_current_cutoff.sum()>1: 3
        relevant_items: [65 68 81 86]
        relevant_items_rating: [1. 1. 1. 1.]
        items_to_compute: 
        [  42   62   65   68   81   86  148  218  559  662  776  792 1164 1341
         1418 1491 1593 1603 1617 1697 2140 2251 2446 2517 2566 2643 2719 2769
         2771 3081 3133 3161 3188 3268 3409 3666 3671 3845 3864 3897 3984 4272
         4327 4329 4431 4519 4565 4568 4718 4812 4915 5096 5128 5137 5141 5184
         5217 5241 5371 5394 5415 5492 5521 5775 5798 5830 5831 5931 6005 6281
         6375 6558 6638 6644 6661 6705 6881 6898 6939 6970 7010 7018 7147 7224
         7327 7404 7453 7466 7475 7561 7764 8064 8102 8222 8368 8530 8957 9101
         9322 9368 9619 9782 9832]
        '''
        print('USER 3')

        print('test ', URM_test[3])
        print('train ', URM_train[3])
        print('valid ', URM_validation[3])
        print('neg ', URM_test_negative[3])

        # Durante l'esecuzione era stato notato un HR>1. Il motivo e' che veniva calcolato sul validation set (che per ogni utente ha
        # piu' oggetti preferiti (non uno)
        # Alla fine l'HR sara' minore o uguale ad uno perche' e' calcolato sul test set.

    popularity = get_popularity(URM_train)

    min_value = np.min(popularity)
    max_value = np.max(popularity)
    gap = max_value - min_value

    popularity = (popularity - min_value) / gap

    print('Luciano > min:', min_value)
    print('Luciano > max:', max_value)
    print('Luciano > normalized popularity:', popularity)

    set_parameters(popularity=popularity,
                   loss_alpha=200,
                   loss_beta=0.02,
                   loss_scale=1,
                   loss_percentile=get_percentile(popularity, 45),
                   metrics_alpha=100,
                   metrics_beta=0.03,
                   metrics_gamma=5,
                   metrics_scale=1 / 15,
                   metrics_percentile=0.45,
                   new_loss=False)

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative_algorithm_list = [
        Random,
        TopPop,
        UserKNNCFRecommender,
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
    ]

    # metric_to_optimize = "WEIGHTED_HIT_RATE"
    metric_to_optimize = "HIT_RATE"
    # metric_to_optimize = "CUSTOM_HIT_RATE"

    print('metric_to_optimize:', metric_to_optimize)

    # Ensure IMPLICIT data and DISJOINT sets
    assert_implicit_data(
        [URM_train, URM_validation, URM_test, URM_test_negative])

    if dataset_name == "citeulike":
        assert_disjoint_matrices([URM_train, URM_validation, URM_test])
        assert_disjoint_matrices([URM_test, URM_test_negative])

    elif dataset_name == "pinterest":
        assert_disjoint_matrices([URM_train, URM_validation, URM_test])
        assert_disjoint_matrices(
            [URM_train, URM_validation, URM_test_negative])

    else:
        assert_disjoint_matrices(
            [URM_train, URM_validation, URM_test, URM_test_negative])

    algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

    plot_popularity_bias([URM_train + URM_validation, URM_test],
                         ["URM train", "URM test"], output_folder_path +
                         algorithm_dataset_string + "popularity_plot")

    save_popularity_statistics([URM_train + URM_validation, URM_test],
                               ["URM train", "URM test"],
                               output_folder_path + algorithm_dataset_string +
                               "popularity_statistics")

    from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample

    evaluator_validation = EvaluatorNegativeItemSample(URM_validation,
                                                       URM_test_negative,
                                                       cutoff_list=[5])
    if not test_mode:
        evaluator_test = EvaluatorNegativeItemSample(URM_test,
                                                     URM_test_negative,
                                                     cutoff_list=[5, 10])
    else:
        evaluator_test = EvaluatorNegativeItemSample(URM_test,
                                                     URM_test_negative,
                                                     cutoff_list=[5])

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        metric_to_optimize=metric_to_optimize,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path,
        parallelizeKNN=False,
        allow_weighting=True,
        n_cases=35)

    # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
    #
    # pool.close()
    # pool.join()

    for recommender_class in collaborative_algorithm_list:

        try:
            if not test_mode:
                runParameterSearch_Collaborative_partial(recommender_class)
            else:
                print('skipping', recommender_class)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()

    ################################################################################################
    ###### CMN
    '''
    Parameters from original paper:
    {
      "batch_size": 128,
      "decay_rate": 0.9,
      "embed_size": 50,
      "filename": "data/pinterest.npz",
      "grad_clip": 5.0,
      "hops": 2,
      "item_count": "9916",
      "l2": 0.1,
      "learning_rate": 0.001,
      "logdir": "result/004/",
      "max_neighbors": 1586,
      "neg_count": 4,
      "optimizer": "rmsprop",
      "optimizer_params": "{'momentum': 0.9, 'decay': 0.9}",
      "pretrain": "pretrain/pinterest_e50.npz",
      "save_directory": "result/004/",
      "tol": 1e-05,
      "user_count": "55187"
    }
    '''

    try:

        temp_file_folder = output_folder_path + "{}_log/".format(
            ALGORITHM_NAME)

        CMN_article_parameters = {
            "epochs": 100,
            "epochs_gmf": 100,
            "hops": 3,
            "neg_samples": 4,
            "reg_l2_cmn": 1e-1,
            "reg_l2_gmf": 1e-4,
            "pretrain": True,
            "learning_rate": 1e-3,
            "verbose": False,
            "temp_file_folder": temp_file_folder
        }

        if dataset_name == "citeulike":
            CMN_article_parameters["batch_size"] = 128
            CMN_article_parameters["embed_size"] = 50

        elif dataset_name == "epinions":
            CMN_article_parameters["batch_size"] = 128
            CMN_article_parameters["embed_size"] = 40

        elif dataset_name == "pinterest":
            CMN_article_parameters["batch_size"] = 128
            # CMN_article_parameters["batch_size"] = 256
            CMN_article_parameters["embed_size"] = 50

        CMN_earlystopping_parameters = {
            "validation_every_n": 5,
            "stop_on_validation": True,
            "evaluator_object": evaluator_validation,
            "lower_validations_allowed": 5,
            "validation_metric": metric_to_optimize
        }

        parameterSearch = SearchSingleCase(
            CMN_RecommenderWrapper,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        recommender_parameters = SearchInputRecommenderParameters(
            CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
            FIT_KEYWORD_ARGS=CMN_earlystopping_parameters)

        parameterSearch.search(
            recommender_parameters,
            fit_parameters_values=CMN_article_parameters,
            output_folder_path=output_folder_path,
            output_file_name_root=CMN_RecommenderWrapper.RECOMMENDER_NAME)

    except Exception as e:

        print("On recommender {} Exception {}".format(CMN_RecommenderWrapper,
                                                      str(e)))
        traceback.print_exc()

    n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1)
    n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)

    print_time_statistics_latex_table(
        result_folder_path=output_folder_path,
        dataset_name=dataset_name,
        results_file_prefix_name=ALGORITHM_NAME,
        other_algorithm_list=[CMN_RecommenderWrapper],
        ICM_names_to_report_list=[],
        n_validation_users=n_validation_users,
        n_test_users=n_test_users,
        n_decimals=2)
    if not test_mode:
        print_results_latex_table(
            result_folder_path=output_folder_path,
            results_file_prefix_name=ALGORITHM_NAME,
            dataset_name=dataset_name,
            metrics_to_report_list=["HIT_RATE", "NDCG"],
            cutoffs_to_report_list=[5, 10],
            ICM_names_to_report_list=[],
            other_algorithm_list=[CMN_RecommenderWrapper])
    else:
        print_results_latex_table(
            result_folder_path=output_folder_path,
            results_file_prefix_name=ALGORITHM_NAME,
            dataset_name=dataset_name,
            metrics_to_report_list=["HIT_RATE", "NDCG"],
            cutoffs_to_report_list=[5],
            ICM_names_to_report_list=[],
            other_algorithm_list=[CMN_RecommenderWrapper])