Ejemplo n.º 1
0
def main(args):
    table_path = load_yaml('config/global.yml', key='path')['tables']

    df = find_best_hyperparameters(table_path + args.problem, 'NDCG')

    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    R_test = load_numpy(path=args.path, name=args.test)

    R_train = R_train + R_valid

    topK = [5, 10, 15, 20, 50]

    frame = []
    for idx, row in df.iterrows():
        start = timeit.default_timer()
        row = row.to_dict()
        row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"]
        row['topK'] = topK
        result = execute(R_train,
                         R_test,
                         row,
                         models[row['model']],
                         gpu_on=args.gpu)
        stop = timeit.default_timer()
        print('Time: ', stop - start)
        frame.append(result)

    results = pd.concat(frame)
    save_dataframe_csv(results, table_path, args.name)
Ejemplo n.º 2
0
def main(args):

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray()

    R_train_keyphrase[R_train_keyphrase != 0] = 1
    R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).T.toarray()

    num_items, num_keyphrases = R_train_item_keyphrase.shape
    for item in range(num_items):
        item_keyphrase = R_train_item_keyphrase[item]
        nonzero_keyphrases_index = item_keyphrase.nonzero()[0]
        nonzero_keyphrases_frequency = item_keyphrase[nonzero_keyphrases_index]
        candidate_index = nonzero_keyphrases_index[np.argsort(-nonzero_keyphrases_frequency)[:10]]
        binarized_keyphrase = np.zeros(num_keyphrases)
        binarized_keyphrase[candidate_index] = 1
        R_train_item_keyphrase[item] = binarized_keyphrase

    R_train_item_keyphrase = sparse.csr_matrix(R_train_item_keyphrase).T

    params = dict()
#    params['model_saved_path'] = args.model_saved_path

    critiquing(R_train,
               R_train_keyphrase,
               R_train_item_keyphrase,
               params,
               args.num_users_sampled,
               load_path=args.load_path,
               save_path=args.save_path,
               critiquing_function=args.critiquing_function)
Ejemplo n.º 3
0
def main(args):
    table_path = load_yaml('config/global.yml', key='path')['tables']

    df = find_best_hyperparameters(table_path+args.tuning_result_path, 'MAP@10')

    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    R_test = load_numpy(path=args.path, name=args.test)

    R_train = R_train + R_valid

#    R_train[(R_train <= 3).nonzero()] = 0
#    R_test[(R_test <= 3).nonzero()] = 0

#    R_train[(R_train > 3).nonzero()] = 1
#    R_test[(R_test > 3).nonzero()] = 1
#    import ipdb; ipdb.set_trace()

    topK = [5, 10, 15, 20, 50]

    frame = []
    for idx, row in df.iterrows():
        start = timeit.default_timer()
        row = row.to_dict()
        row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"]
        row['topK'] = topK
        result = execute(R_train, R_test, row, models[row['model']])
        stop = timeit.default_timer()
        print('Time: ', stop - start)
        frame.append(result)

    results = pd.concat(frame)
    save_dataframe_csv(results, table_path, args.name)
Ejemplo n.º 4
0
def main(args):
    table_path = load_yaml('config/global.yml', key='path')['tables']

    df = find_best_hyperparameters(table_path + args.tuning_result_path,
                                   'NDCG')

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    R_valid = load_numpy(path=args.data_dir, name=args.valid_set)
    R_test = load_numpy(path=args.data_dir, name=args.test_set)

    R_train = R_train + R_valid

    topK = [5, 10, 15, 20, 50]

    frame = []
    for idx, row in df.iterrows():
        start = timeit.default_timer()
        row = row.to_dict()
        row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"]
        row['topK'] = topK
        result = general(R_train,
                         R_test,
                         row,
                         models[row['model']],
                         measure=row['similarity'],
                         gpu_on=args.gpu,
                         model_folder=args.model_folder)
        stop = timeit.default_timer()
        print('Time: ', stop - start)
        frame.append(result)

    results = pd.concat(frame)
    save_dataframe_csv(results, table_path, args.save_path)
def main(args):

    settings_df = load_dataframe_csv(args.tab_path + args.setting_dir)

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    R_valid = load_numpy(path=args.data_dir, name=args.valid_set)
    R_test = load_numpy(path=args.data_dir, name=args.test_set)

    index_map = np.load(args.data_dir + args.index)

    item_names = None

    try:
        item_names = load_dataframe_csv(args.data_dir + args.names,
                                        delimiter="::",
                                        names=['ItemID', 'Name', 'Category'])
    except:
        print("Meta-data does not exist")

    attention(R_train,
              R_valid,
              R_test,
              index_map,
              item_names,
              args.tex_path,
              args.fig_path,
              settings_df,
              args.template_path,
              preference_analysis=args.preference_analysis,
              case_study=args.case_study,
              gpu_on=True)
Ejemplo n.º 6
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(
        args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(
        R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(
        path=args.data_dir, name=args.train_item_keyphrase_set).toarray()

    table_path = load_yaml('config/global.yml', key='path')['tables']
    parameters = find_best_hyperparameters(table_path + args.dataset_name,
                                           'NDCG')
    parameters_row = parameters.loc[parameters['model'] == args.model]

    if args.dataset_name == "yelp/":
        R_train_item_keyphrase = R_train_item_keyphrase.T

    start_time = time.time()

    results = critiquing(
        matrix_Train=R_train,
        matrix_Test=R_test,
        keyphrase_freq=R_train_keyphrase,
        item_keyphrase_freq=R_train_item_keyphrase,
        num_users_sampled=args.num_users_sampled,
        num_items_sampled=args.num_items_sampled,
        max_iteration_threshold=args.max_iteration_threshold,
        dataset_name=args.dataset_name,
        model=models[args.model],
        parameters_row=parameters_row,
        critiquing_model_name=args.critiquing_model_name,
        keyphrase_selection_method=args.keyphrase_selection_method,
        topk=args.topk,
        lamb=args.lamb)

    print("Final Time Elapsed: {}".format(inhour(time.time() - start_time)))

    table_path = load_yaml('config/global.yml', key='path')['tables']
    save_dataframe_csv(results, table_path, args.save_path)
Ejemplo n.º 7
0
def main(args):
    progress = WorkSplitter()
    progress.section("Tune Parameters")
    params = load_yaml(args.grid)
    params['models'] = {params['models']: models[params['models']]}
    train = load_numpy(path=args.path, name=args.dataset + args.train)
    unif_train = load_numpy(path=args.path, name=args.dataset + args.unif_train)
    valid = load_numpy(path=args.path, name=args.dataset + args.valid)
    hyper_parameter_tuning(train, valid, params, unif_train=unif_train, save_path=args.dataset + args.name,
                           gpu_on=args.gpu, seed=args.seed, way=args.way, dataset=args.dataset)
Ejemplo n.º 8
0
def main(args):
    params = load_yaml(args.grid)
    params['models'] = {params['models']: models[params['models']]}
    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    hyper_parameter_tuning(R_train,
                           R_valid,
                           params,
                           save_path=args.name,
                           measure=params['similarity'],
                           gpu_on=args.gpu)
Ejemplo n.º 9
0
def main(args):
    table_path = load_yaml('config/global.yml', key='path')['tables']

    df = find_best_hyperparameters(table_path+args.param, 'NDCG')

    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)

    results = converge(R_train, R_valid, df, table_path, args.name, epochs=500, gpu_on=args.gpu)

    show_training_progress(results, hue='model', metric='NDCG', name="epoch_vs_ndcg")
Ejemplo n.º 10
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).toarray()

    table_path = load_yaml('config/global.yml', key='path')['tables']
    # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG')
    # parameters_row = parameters.loc[parameters['model'] == args.model]

    parameters_row = {
        'iter' : 10,
        'lambda' : 200,
        'rank' : 200 
    }
    
    keyphrases_names = load_dataframe_csv(path = args.data_dir, name = "Keyphrases.csv")['Phrases'].tolist()
    
    results = critiquing(matrix_Train=R_train,
                         matrix_Test=R_test,
                         keyphrase_freq=R_train_keyphrase,
                         item_keyphrase_freq=R_train_item_keyphrase.T,
                         num_users_sampled=args.num_users_sampled,
                         num_items_sampled=args.num_items_sampled,
                         max_iteration_threshold=args.max_iteration_threshold,
                         dataset_name=args.dataset_name,
                         model=models[args.model],
                         parameters_row=parameters_row,
                         critiquing_model_name=args.critiquing_model_name,
                         lamb = args.lambdas,
                         keyphrases_names = keyphrases_names,
                         keyphrase_selection_method = args.keyphrase_selection_method)

    table_path = load_yaml('config/global.yml', key='path')['tables']
    save_dataframe_csv(results, table_path, args.save_path)
Ejemplo n.º 11
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyper parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {}".format(args.path))
    print("Train File Name: {}".format(args.train))
    if args.validation:
        print("Valid File Name: {}".format(args.valid))
    print("Algorithm: {}".format(args.model))
    print("Lambda Diversity: {}".format(args.lambda_diversity))
    print("Lambda Serendipity: {}".format(args.lambda_serendipity))
    print("Nearest Neighbor Number: {}".format(args.k))
    print("Evaluation Ranking Topk: {}".format(args.topk))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()
    R_train = load_numpy(path=args.path, name=args.train)
    print("Elapsed: {}".format(inhour(time.time() - start_time)))
    print("Train U-I Dimensions: {}".format(R_train.shape))

    progress.section("Train")
    model = models[args.model]()
    model.train(R_train)

    progress.section("Predict")
    prediction_score = model.predict(
        R_train,
        k=args.k,
        lambda_diversity=args.lambda_diversity,
        lambda_serendipity=args.lambda_serendipity)

    prediction = predict(prediction_score=prediction_score,
                         topK=args.topk,
                         matrix_Train=R_train)

    if args.validation:
        progress.section("Create Metrics")
        start_time = time.time()

        metric_names = [
            'R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP'
        ]

        R_valid = load_numpy(path=args.path, name=args.valid)
        result = evaluate(prediction, R_valid, metric_names, [args.topk])
        print("-")
        for metric in result.keys():
            print("{}:{}".format(metric, result[metric]))
        print("Elapsed: {}".format(inhour(time.time() - start_time)))
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(
        args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set).toarray()
    print("Train User Keyphrase U-I Dimensions: {}".format(
        R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(
        path=args.data_dir, name=args.train_item_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(
        R_train_item_keyphrase.shape))

    # table_path = load_yaml('config/global.yml', key='path')['tables']
    # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG')
    # parameters_row = parameters.loc[parameters['model'] == args.model]
    parameters_row = pd.DataFrame({'iter': [4], 'lambda': [80], 'rank': [200]})

    results = critiquing(matrix_Train=R_train,
                         matrix_Test=R_test,
                         keyphrase_freq=R_train_keyphrase,
                         item_keyphrase_freq=R_train_item_keyphrase,
                         num_users_sampled=args.num_users_sampled,
                         num_items_sampled=args.num_items_sampled,
                         max_iteration_threshold=args.max_iteration_threshold,
                         dataset_name=args.dataset_name,
                         model=models[args.model],
                         parameters_row=parameters_row,
                         critiquing_model_name=args.critiquing_model_name)

    # table_path = load_yaml('config/global.yml', key='path')['tables']
    table_path = '/home/shuyang/data4/LatentLinearCritiquingforConvRecSys/'
    save_dataframe_csv(results, table_path, args.save_path)
Ejemplo n.º 13
0
def main(args):
    progress = WorkSplitter()

    table_path = 'tables/'

    test = load_numpy(path=args.path, name=args.dataset + args.test)

    df = pd.DataFrame({
        'model': [
            'RestrictedBatchSampleMF', 'RestrictedBatchSampleMF',
            'RestrictedBatchSampleMF', 'RestrictedBatchSampleMF',
            'RestrictedBatchSampleMF'
        ],
        'way': [None, 'head_users', 'tail_users', 'head_items', 'tail_items']
    })

    progress.subsection("Gain Analysis")
    frame = []
    for idx, row in df.iterrows():
        row = row.to_dict()
        row['metric'] = ['NLL', 'AUC']
        row['rank'] = 10
        result = execute(test, row, folder=args.model_folder + args.dataset)
        frame.append(result)

    results = pd.concat(frame)
    save_dataframe_csv(results, table_path, args.name)
Ejemplo n.º 14
0
def main(args):
    progress = WorkSplitter()

    table_path = 'tables/'

    test = load_numpy(path=args.path, name=args.dataset + args.test)

    df = pd.DataFrame({
        'model': [
            "BiasedMF", "BiasedMF", "BiasedMF", "PropensityMF",
            "InitFeatureEmbedMF", "InitFeatureEmbedMF", "InitFeatureEmbedMF",
            "AlterFeatureEmbedMF", "ConcatFeatureEmbedMF", "CausalSampleMF",
            "UnionSampleMF", "WRSampleMF", "BatchSampleMF", "BridgeLabelMF",
            "RefineLabelMF"
        ],
        'way': [
            None, "unif", "combine", None, "user", "item", "both", None, None,
            None, None, None, None, None, None
        ]
    })

    progress.subsection("Reproduce")
    frame = []
    for idx, row in df.iterrows():
        row = row.to_dict()
        row['metric'] = ['NLL', 'AUC']
        row['rank'] = 10
        result = execute(test, row, folder=args.model_folder + args.dataset)
        frame.append(result)

    results = pd.concat(frame)
    save_dataframe_csv(results, table_path, args.name)
Ejemplo n.º 15
0
def main(args):
    progress = WorkSplitter()

    table_path = 'tables/'

    test = load_numpy(path=args.path, name=args.dataset + args.test)

    df = pd.DataFrame({
        'model': [
            'AutoRec', 'AutoRec', 'AutoRec', 'InitFeatureEmbedAE',
            'InitFeatureEmbedAE', 'InitFeatureEmbedAE', 'AlterFeatureEmbedAE',
            'ConcatFeatureEmbedAE', 'UnionSampleAE', 'WRSampleAE',
            'BatchSampleAE', 'BridgeLabelAE', 'RefineLabelAE', 'DeepAutoRec',
            'DeepAutoRec', 'SoftLabelAE', 'HintAE'
        ],
        'way': [
            None, 'unif', 'combine', 'user', 'item', 'both', None, None, None,
            None, None, None, None, None, 'unif', None, None
        ]
    })

    progress.subsection("Reproduce")
    frame = []
    for idx, row in df.iterrows():
        row = row.to_dict()
        row['metric'] = ['NLL', 'AUC']
        row['rank'] = 200
        result = execute(test, row, folder=args.model_folder + args.dataset)
        frame.append(result)

    results = pd.concat(frame)
    save_dataframe_csv(results, table_path, args.name)
Ejemplo n.º 16
0
def main(args):
    table_path = load_yaml('config/global.yml', key='path')['tables']
    df = find_best_hyperparameters(table_path + args.problem, 'NDCG')

    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    R_test = load_numpy(path=args.path, name=args.test)

    R_train = R_train + R_valid

    topK = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
    metric = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP']

    usercategory(R_train,
                 R_test,
                 df,
                 topK,
                 metric,
                 args.problem,
                 args.model_folder,
                 gpu_on=args.gpu)
Ejemplo n.º 17
0
def main(args):

    table_path = load_yaml('config/global.yml', key='path')['tables']

    df = find_best_hyperparameters(table_path + args.problem, 'NDCG')

    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    R_test = load_numpy(path=args.path, name=args.test)

    R_train = R_train + R_valid

    topK = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

    personalization(R_train,
                    R_test,
                    df,
                    topK,
                    args.problem,
                    args.model_folder,
                    gpu_on=args.gpu)
def main(args):

    params = dict()
    params['tuning_result_path'] = args.tuning_result_path

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set)
    R_test_keyphrase = load_numpy(path=args.data_dir,
                                  name=args.test_keyphrase_set)
    R_train_keyphrase[R_train_keyphrase != 0] = 1
    R_test_keyphrase[R_test_keyphrase != 0] = 1

    general(R_train,
            R_test,
            R_train_keyphrase.todense(),
            R_test_keyphrase,
            params,
            save_path=args.save_path,
            final_explanation=args.final_explanation)
def main(args):
    params = load_yaml(args.parameters)

    params['models'] = {params['models']: models[params['models']]}

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    R_valid = load_numpy(path=args.data_dir, name=args.valid_set)
    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set)
    R_valid_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.valid_keyphrase_set)
    R_train_keyphrase[R_train_keyphrase != 0] = 1
    R_valid_keyphrase[R_valid_keyphrase != 0] = 1

    hyper_parameter_tuning(R_train,
                           R_valid,
                           R_train_keyphrase.todense(),
                           R_valid_keyphrase,
                           params,
                           save_path=args.save_path,
                           tune_explanation=args.tune_explanation)
Ejemplo n.º 20
0
def cml_normalized(matrix_train, time_stamp_matrix=None, embeded_matrix=np.empty((0)),
                   iteration=100, lam=80, rank=200, seed=1, **unused):
    progress = WorkSplitter()
    matrix_input = matrix_train

    from utils.io import load_numpy
    time_stamp_matrix = load_numpy(path='datax/', name='Rtime.npz')
    orders = get_orders(time_stamp_matrix.multiply(matrix_train))

    if embeded_matrix.shape[0] > 0:
        matrix_input = vstack((matrix_input, embeded_matrix.T))

    m, n = matrix_input.shape
    model = NormalizedCollaborativeMetricLearning(num_users=m, num_items=n, embed_dim=rank, cov_loss_weight=lam)

    model.train_model(matrix_input, orders, iteration)

    RQ = model.get_RQ()
    Y = model.get_Y().T
    tf.reset_default_graph()
    return RQ, Y, None
Ejemplo n.º 21
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyper parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {0}".format(args.data_dir))
    print("Train File Name: {0}".format(args.train_set))
    if args.validation:
        print("Valid File Name: {0}".format(args.valid_set))
    print("Algorithm: {0}".format(args.model))
    if args.item == True:
        mode = "Item-based"
    else:
        mode = "User-based"
    print("Normalize: {0}".format(args.normalize))
    print("Mode: {0}".format(mode))
    print("Alpha: {0}".format(args.alpha))
    print("Rank: {0}".format(args.rank))
    print("Mode Dimension: {0}".format(args.mode_dim))
    print("Key Dimension: {0}".format(args.key_dim))
    print("Batch Size: {0}".format(args.batch_size))
    print("Optimizer: {0}".format(args.optimizer))
    print("Learning Rate: {0}".format(args.learning_rate))
    print("Lambda: {0}".format(args.lamb))
    print("SVD/Alter Iteration: {0}".format(args.iteration))
    print("Epoch: {0}".format(args.epoch))
    print("Corruption: {0}".format(args.corruption))
    print("Root: {0}".format(args.root))
    print("Evaluation Ranking Topk: {0}".format(args.topk))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()
    if args.shape is None:
        R_train = load_numpy(path=args.data_dir, name=args.train_set)
    else:
        # R_train = load_pandas(path=args.data_dir, name=args.train_set, shape=args.shape)
        R_train = load_csv(path=args.data_dir,
                           name=args.train_set,
                           shape=args.shape)

    print("Elapsed: {0}".format(inhour(time.time() - start_time)))

    print("Train U-I Dimensions: {0}".format(R_train.shape))

    # Item-Item or User-User
    if args.item == True:
        RQ, Yt, Bias = models[args.model](R_train,
                                          embedded_matrix=np.empty((0)),
                                          mode_dim=args.mode_dim,
                                          key_dim=args.key_dim,
                                          batch_size=args.batch_size,
                                          optimizer=args.optimizer,
                                          learning_rate=args.learning_rate,
                                          normalize=args.normalize,
                                          iteration=args.iteration,
                                          epoch=args.epoch,
                                          rank=args.rank,
                                          corruption=args.corruption,
                                          gpu_on=args.gpu,
                                          lamb=args.lamb,
                                          alpha=args.alpha,
                                          seed=args.seed,
                                          root=args.root)
        Y = Yt.T
    else:
        Y, RQt, Bias = models[args.model](R_train.T,
                                          embedded_matrix=np.empty((0)),
                                          mode_dim=args.mode_dim,
                                          key_dim=args.key_dim,
                                          batch_size=args.batch_size,
                                          optimizer=args.optimizer,
                                          learning_rate=args.learning_rate,
                                          normalize=args.normalize,
                                          iteration=args.iteration,
                                          rank=args.rank,
                                          corruption=args.corruption,
                                          gpu_on=args.gpu,
                                          lamb=args.lamb,
                                          alpha=args.alpha,
                                          seed=args.seed,
                                          root=args.root)
        RQ = RQt.T

    # np.save('latent/U_{0}_{1}'.format(args.model, args.rank), RQ)
    # np.save('latent/V_{0}_{1}'.format(args.model, args.rank), Y)
    # if Bias is not None:
    #     np.save('latent/B_{0}_{1}'.format(args.model, args.rank), Bias)

    progress.section("Predict")
    prediction = predict(matrix_U=RQ,
                         matrix_V=Y,
                         bias=Bias,
                         topK=args.topk,
                         matrix_Train=R_train,
                         measure=args.sim_measure,
                         gpu=args.gpu)
    if args.validation:
        progress.section("Create Metrics")
        start_time = time.time()

        metric_names = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision']
        R_valid = load_numpy(path=args.data_dir, name=args.valid_set)
        result = evaluate(prediction, R_valid, metric_names, [args.topk])
        print("-")
        for metric in result.keys():
            print("{0}:{1}".format(metric, result[metric]))
        print("Elapsed: {0}".format(inhour(time.time() - start_time)))
Ejemplo n.º 22
0
def main(args):
    params = load_yaml(args.parameters)
    params['models'] = {params['models']: models[params['models']]}
    R_train = load_numpy(path=args.path, name=args.train)
    R_valid = load_numpy(path=args.path, name=args.valid)
    hyper_parameter_tuning(R_train, R_valid, params, save_path=args.save_path)
Ejemplo n.º 23
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyper parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {0}".format(args.path))
    print("Train File Name: {0}".format(args.train))
    if args.validation:
        print("Valid File Name: {0}".format(args.valid))
    print("Algorithm: {0}".format(args.model))
    if args.item == True:
        mode = "Item-based"
    else:
        mode = "User-based"
    print("Mode: {0}".format(mode))
    print("Alpha: {0}".format(args.alpha))
    print("Rank: {0}".format(args.rank))
    print("Lambda: {0}".format(args.lamb))
    print("SVD/Alter Iteration: {0}".format(args.iter))
    print("Evaluation Ranking Topk: {0}".format(args.topk))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()
    if args.shape is None:
        R_train = load_numpy(path=args.path, name=args.train)
    else:
        # R_train = load_pandas(path=args.path, name=args.train, shape=args.shape)
        R_train = load_csv(path=args.path, name=args.train, shape=args.shape)
    print "Elapsed: {0}".format(inhour(time.time() - start_time))

    print("Train U-I Dimensions: {0}".format(R_train.shape))

    # Item-Item or User-User
    if args.item == True:
        RQ, Yt, Bias = models[args.model](R_train,
                                          embeded_matrix=np.empty((0)),
                                          iteration=args.iter,
                                          rank=args.rank,
                                          corruption=args.corruption,
                                          lam=args.lamb,
                                          alpha=args.alpha,
                                          seed=args.seed,
                                          root=args.root)
        Y = Yt.T
    else:
        Y, RQt, Bias = models[args.model](R_train.T,
                                          embeded_matrix=np.empty((0)),
                                          iteration=args.iter,
                                          rank=args.rank,
                                          corruption=args.corruption,
                                          lam=args.lamb,
                                          alpha=args.alpha,
                                          seed=args.seed,
                                          root=args.root)
        RQ = RQt.T

    # Save Files
    # progress.section("Save U-V Matrix")
    # start_time = time.time()
    # save_mxnet(matrix=RQ, path=args.path+mode+'/',
    #            name='U_{0}_{1}_{2}'.format(args.rank, args.lamb, args.model))
    # save_mxnet(matrix=Y, path=args.path+mode+'/',
    #            name='V_{0}_{1}_{2}'.format(args.rank, args.lamb, args.model))
    # print "Elapsed: {0}".format(inhour(time.time() - start_time))

    np.save('latent/U_{0}_{1}'.format(args.model, args.rank), RQ)
    np.save('latent/V_{0}_{1}'.format(args.model, args.rank), Y)
    if Bias is not None:
        np.save('latent/B_{0}_{1}'.format(args.model, args.rank), Bias)

    progress.section("Predict")
    prediction = predict(matrix_U=RQ,
                         matrix_V=Y,
                         bias=Bias,
                         topK=args.topk,
                         matrix_Train=R_train,
                         measure=args.sim_measure,
                         gpu=True)
    if args.validation:
        progress.section("Create Metrics")
        start_time = time.time()

        metric_names = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision']
        R_valid = load_numpy(path=args.path, name=args.valid)
        result = evaluate(prediction, R_valid, metric_names, [args.topk])
        print("-")
        for metric in result.keys():
            print("{0}:{1}".format(metric, result[metric]))
        print "Elapsed: {0}".format(inhour(time.time() - start_time))
Ejemplo n.º 24
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")

    print("Data Directory: {}".format(args.data_dir))
    print("Algorithm: {}".format(args.model))
    print("Optimizer: {}".format(args.optimizer))
    print("Corruption Rate: {}".format(args.corruption))
    print("Learning Rate: {}".format(args.learning_rate))
    print("Epoch: {}".format(args.epoch))
    print("Lambda L2: {}".format(args.lamb_l2))
    print("Lambda Keyphrase: {}".format(args.lamb_keyphrase))
    print("Lambda Latent: {}".format(args.lamb_latent))
    print("Lambda Rating: {}".format(args.lamb_rating))
    print("Beta: {}".format(args.beta))
    print("Rank: {}".format(args.rank))
    print("Train Batch Size: {}".format(args.train_batch_size))
    print("Predict Batch Size: {}".format(args.predict_batch_size))
    print("Evaluation Ranking Topk: {}".format(args.topk))
    print("Validation Enabled: {}".format(args.enable_validation))

    # Load Data
    progress.section("Load Data")
    start_time = time.time()

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set).toarray()
    print("Train Keyphrase U-S Dimensions: {}".format(R_train_keyphrase.shape))

    if args.enable_validation:
        R_valid = load_numpy(path=args.data_dir, name=args.valid_set)
        R_valid_keyphrase = load_numpy(path=args.data_dir,
                                       name=args.valid_keyphrase_set)
    else:
        R_valid = load_numpy(path=args.data_dir, name=args.test_set)
        R_valid_keyphrase = load_numpy(path=args.data_dir,
                                       name=args.test_keyphrase_set)
    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    progress.section("Preprocess Keyphrase Frequency")
    start_time = time.time()

    R_train_keyphrase[R_train_keyphrase != 0] = 1
    R_valid_keyphrase[R_valid_keyphrase != 0] = 1
    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    progress.section("Train")
    start_time = time.time()

    model = models[args.model](matrix_train=R_train,
                               epoch=args.epoch,
                               lamb_l2=args.lamb_l2,
                               lamb_keyphrase=args.lamb_keyphrase,
                               lamb_latent=args.lamb_latent,
                               lamb_rating=args.lamb_rating,
                               beta=args.beta,
                               learning_rate=args.learning_rate,
                               rank=args.rank,
                               corruption=args.corruption,
                               optimizer=args.optimizer,
                               matrix_train_keyphrase=R_train_keyphrase)
    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    progress.section("Predict")
    start_time = time.time()

    rating_score, keyphrase_score = model.predict(R_train.todense())
    prediction = predict(rating_score, args.topk, matrix_Train=R_train)
    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    if args.enable_evaluation:
        progress.section("Create Metrics")
        start_time = time.time()

        metric_names = [
            'R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP'
        ]
        result = evaluate(prediction, R_valid, metric_names, [args.topk])

        print("-")
        for metric in result.keys():
            print("{}:{}".format(metric, result[metric]))

        if keyphrase_score is not None:
            keyphrase_prediction = predict_keyphrase(keyphrase_score,
                                                     args.topk)
            keyphrase_result = evaluate(keyphrase_prediction,
                                        sparse.csr_matrix(R_valid_keyphrase),
                                        metric_names, [args.topk])

            print("-")
            for metric in keyphrase_result.keys():
                print("{}:{}".format(metric, keyphrase_result[metric]))

        print("Elapsed: {}".format(inhour(time.time() - start_time)))

    model.sess.close()
    tf.reset_default_graph()
Ejemplo n.º 25
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyper parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {0}".format(args.path))
    print("Train File Name: {0}".format(args.dataset + args.train))
    print("Uniform Train File Name: {0}".format(args.dataset + args.unif_train))
    print("Valid File Name: {0}".format(args.dataset + args.valid))
    print("Algorithm: {0}".format(args.model))
    print("Way: {0}".format(args.way))
    print("Seed: {0}".format(args.seed))
    print("Batch Size: {0}".format(args.batch_size))
    print("Rank: {0}".format(args.rank))
    print("Lambda: {0}".format(args.lamb))
    print("Iteration: {0}".format(args.iter))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()

    train = load_numpy(path=args.path, name=args.dataset + args.train)
    print("Elapsed: {0}".format(inhour(time.time() - start_time)))

    print("Train U-I Dimensions: {0}".format(train.shape))

    # Train Model
    valid = load_numpy(path=args.path, name=args.dataset + args.valid)
    unif_train = load_numpy(path=args.path, name=args.dataset + args.unif_train)
    RQ, Y, uBias, iBias = models[args.model](train, valid, dataset=args.dataset, matrix_unif_train=unif_train,
                                             iteration=args.iter, rank=args.rank, gpu_on=args.gpu, lam=args.lamb,
                                             lam2=args.lamb2, seed=args.seed, batch_size=args.batch_size, way=args.way,
                                             confidence=args.confidence, step=args.step)

    save_path = 'latent/' + args.dataset
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.way is None:
        np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ)
        np.save(save_path + '/V_{0}_{1}'.format(args.model, args.rank), Y)
        if uBias is not None:
            np.save(save_path + '/uB_{0}_{1}'.format(args.model, args.rank), uBias)
            np.save(save_path + '/iB_{0}_{1}'.format(args.model, args.rank), iBias)
    else:
        np.save(save_path + '/' + args.way + '_U_{0}_{1}'.format(args.model, args.rank), RQ)
        np.save(save_path + '/' + args.way + '_V_{0}_{1}'.format(args.model, args.rank), Y)
        if uBias is not None:
            np.save(save_path + '/' + args.way + '_uB_{0}_{1}'.format(args.model, args.rank), uBias)
            np.save(save_path + '/' + args.way + '_iB_{0}_{1}'.format(args.model, args.rank), iBias)

    progress.section("Predict")
    prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=valid, ubias=uBias, ibias=iBias, gpu=args.gpu)

    progress.section("Evaluation")
    start_time = time.time()
    metric_names = ['NLL', 'AUC']
    result = evaluate(prediction, valid, metric_names, gpu=args.gpu)

    print("----Final Result----")
    for metric in result.keys():
        print("{0}:{1}".format(metric, result[metric]))
    print("Elapsed: {0}".format(inhour(time.time() - start_time)))
Ejemplo n.º 26
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyper parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {0}".format(args.path))
    print("Train File Name: {0}".format(args.dataset + args.train))
    print("Uniform Train File Name: {0}".format(args.dataset +
                                                args.unif_train))
    print("Valid File Name: {0}".format(args.dataset + args.valid))
    print("Algorithm: {0}".format(args.model))
    print("Way: {0}".format(args.way))
    print("Seed: {0}".format(args.seed))
    print("Batch Size: {0}".format(args.batch_size))
    print("Rank: {0}".format(args.rank))
    print("Lambda: {0}".format(args.lamb))
    print("Iteration: {0}".format(args.iter))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()

    train = load_numpy(path=args.path, name=args.dataset + args.train)

    print("Elapsed: {0}".format(inhour(time.time() - start_time)))

    print("Train U-I Dimensions: {0}".format(train.shape))

    # Train Model
    valid = load_numpy(path=args.path, name=args.dataset + args.valid)

    unif_train = load_numpy(path=args.path,
                            name=args.dataset + args.unif_train)

    if args.model in ['DeepAutoRec', 'HintAE', 'SoftLabelAE']:
        RQ, X, xBias, Y, yBias, Z, zBias, K, kBias = models[args.model](
            train,
            valid,
            dataset=args.dataset,
            matrix_unif_train=unif_train,
            iteration=args.iter,
            rank=args.rank,
            rank2=args.rank2,
            gpu_on=args.gpu,
            lam=args.lamb,
            seed=args.seed,
            batch_size=args.batch_size,
            way=args.way,
            confidence=args.confidence,
            step=args.step,
            tau=args.tau)

        save_path = 'latent/' + args.dataset
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        if args.way is None:
            np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ)
            np.save(save_path + '/Y_{0}_{1}'.format(args.model, args.rank), Y)
            np.save(save_path + '/X_{0}_{1}'.format(args.model, args.rank), X)
            np.save(save_path + '/Z_{0}_{1}'.format(args.model, args.rank), Z)
            np.save(save_path + '/K_{0}_{1}'.format(args.model, args.rank), K)
            if xBias is not None:
                np.save(
                    save_path + '/xB_{0}_{1}'.format(args.model, args.rank),
                    xBias)
                np.save(
                    save_path + '/yB_{0}_{1}'.format(args.model, args.rank),
                    yBias)
                np.save(
                    save_path + '/zB_{0}_{1}'.format(args.model, args.rank),
                    zBias)
                np.save(
                    save_path + '/kB_{0}_{1}'.format(args.model, args.rank),
                    kBias)
        else:
            np.save(
                save_path + '/' + args.way +
                '_U_{0}_{1}'.format(args.model, args.rank), RQ)
            np.save(
                save_path + '/' + args.way +
                '_Y_{0}_{1}'.format(args.model, args.rank), Y)
            np.save(
                save_path + '/' + args.way +
                '_X_{0}_{1}'.format(args.model, args.rank), X)
            np.save(
                save_path + '/' + args.way +
                '_Z_{0}_{1}'.format(args.model, args.rank), Z)
            np.save(
                save_path + '/' + args.way +
                '_K_{0}_{1}'.format(args.model, args.rank), K)
            if xBias is not None:
                np.save(
                    save_path + '/' + args.way +
                    '_xB_{0}_{1}'.format(args.model, args.rank), xBias)
                np.save(
                    save_path + '/' + args.way +
                    '_yB_{0}_{1}'.format(args.model, args.rank), yBias)
                np.save(
                    save_path + '/' + args.way +
                    '_zB_{0}_{1}'.format(args.model, args.rank), zBias)
                np.save(
                    save_path + '/' + args.way +
                    '_kB_{0}_{1}'.format(args.model, args.rank), kBias)

        progress.section("Predict")
        prediction = predict(matrix_U=RQ,
                             matrix_V=K.T,
                             matrix_Valid=valid,
                             bias=yBias,
                             gpu=args.gpu)

        progress.section("Evaluation")
        start_time = time.time()
        metric_names = ['NLL', 'AUC']
        result = evaluate(prediction, valid, metric_names, gpu=args.gpu)

        print("----Final Result----")
        for metric in result.keys():
            print("{0}:{1}".format(metric, result[metric]))
        print("Elapsed: {0}".format(inhour(time.time() - start_time)))
    else:
        RQ, X, xBias, Y, yBias = models[args.model](
            train,
            valid,
            dataset=args.dataset,
            matrix_unif_train=unif_train,
            iteration=args.iter,
            rank=args.rank,
            gpu_on=args.gpu,
            lam=args.lamb,
            lam2=args.lamb2,
            seed=args.seed,
            batch_size=args.batch_size,
            way=args.way,
            confidence=args.confidence,
            step=args.step)

        save_path = 'latent/' + args.dataset
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        if args.way is None:
            np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ)
            np.save(save_path + '/Y_{0}_{1}'.format(args.model, args.rank), Y)
            np.save(save_path + '/X_{0}_{1}'.format(args.model, args.rank), X)
            if xBias is not None:
                np.save(
                    save_path + '/xB_{0}_{1}'.format(args.model, args.rank),
                    xBias)
                np.save(
                    save_path + '/yB_{0}_{1}'.format(args.model, args.rank),
                    yBias)
        else:
            np.save(
                save_path + '/' + args.way +
                '_U_{0}_{1}'.format(args.model, args.rank), RQ)
            np.save(
                save_path + '/' + args.way +
                '_Y_{0}_{1}'.format(args.model, args.rank), Y)
            np.save(
                save_path + '/' + args.way +
                '_X_{0}_{1}'.format(args.model, args.rank), X)
            if xBias is not None:
                np.save(
                    save_path + '/' + args.way +
                    '_xB_{0}_{1}'.format(args.model, args.rank), xBias)
                np.save(
                    save_path + '/' + args.way +
                    '_yB_{0}_{1}'.format(args.model, args.rank), yBias)

        progress.section("Predict")
        prediction = predict(matrix_U=RQ,
                             matrix_V=Y.T,
                             matrix_Valid=valid,
                             bias=yBias,
                             gpu=args.gpu)

        progress.section("Evaluation")
        start_time = time.time()
        metric_names = ['NLL', 'AUC']
        result = evaluate(prediction, valid, metric_names, gpu=args.gpu)

        print("----Final Result----")
        for metric in result.keys():
            print("{0}:{1}".format(metric, result[metric]))
        print("Elapsed: {0}".format(inhour(time.time() - start_time)))
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(
        args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(
        R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(
        path=args.data_dir, name=args.train_item_keyphrase_set).toarray()

    table_path = load_yaml('config/global.yml', key='path')['tables']
    parameters = find_best_hyperparameters(table_path + args.dataset_name,
                                           'NDCG')
    parameters_row = parameters.loc[parameters['model'] == args.model]

    lambs = [
        0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500,
        1000, 10000, 100000
    ]
    topks = [10, 20, 50, 100]

    if args.dataset_name == "yelp/":
        R_train_item_keyphrase = R_train_item_keyphrase.T

    for topk in topks:
        for lamb in lambs:
            results = critiquing(
                matrix_Train=R_train,
                matrix_Test=R_test,
                keyphrase_freq=R_train_keyphrase,
                item_keyphrase_freq=R_train_item_keyphrase,
                num_users_sampled=args.num_users_sampled,
                num_items_sampled=args.num_items_sampled,
                max_iteration_threshold=args.max_iteration_threshold,
                dataset_name=args.dataset_name,
                model=models[args.model],
                parameters_row=parameters_row,
                critiquing_model_name=args.critiquing_model_name,
                keyphrase_selection_method=args.keyphrase_selection_method,
                topk=topk,
                lamb=lamb)
            table_path = load_yaml('config/global.yml', key='path')['tables']
            topk_path = "topk_" + str(topk) + "/"
            save_name = args.save_path + topk_path + "tuning_at_lamb_" + str(
                lamb) + "_with_" + args.keyphrase_selection_method + ".csv"
            save_dataframe_csv(results, table_path, save_name)
Ejemplo n.º 28
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {}".format(args.path))
    print("Active Learning Algorithm: {}".format(args.active_model))
    print("Recommendation Algorithm: {}".format(args.rec_model))
    print("GPU: {}".format(args.gpu))
    print("Iterative: {}".format(args.iterative))
    print("Sample From All: {}".format(args.sample_from_all))
    print("Train Valid Test Split Ratio: {}".format(args.ratio))
    print("Learning Rate: {}".format(args.learning_rate))
    print("Rank: {}".format(args.rank))
    print("Lambda: {}".format(args.lamb))
    print("Epoch: {}".format(args.epoch))
    print("Active Learning Iteration: {}".format(args.active_iteration))
    print("Evaluation Ranking Topk: {}".format(args.topk))
    print("UCB Confidence: {}".format(args.confidence_interval))
    print("Number of Item per Active Iteration: {}".format(args.num_item_per_iter))
    print("UCB Number of Latent Sampling: {}".format(args.num_latent_sampling))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()
    R_train = load_numpy(path=args.path, name=args.train)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_active = load_numpy(path=args.path, name=args.active)
    print("Active U-I Dimensions: {}".format(R_active.shape))

    R_test = load_numpy(path=args.path, name=args.test)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    train_index = int(R_test.shape[0]*args.ratio[0])

    progress.section("Preparing Data")
    matrix_train, matrix_active, matrix_test, _ = filter_users(R_train,
                                                               R_active,
                                                               R_test,
                                                               train_index=train_index,
                                                               active_threshold=2*args.num_item_per_iter*args.active_iteration,
                                                               test_threshold=2*args.topk)

    m, n = matrix_train.shape

    history_items = np.array([])

    model = rec_models[args.rec_model](observation_dim=n, latent_dim=args.rank,
                                       batch_size=128, lamb=args.lamb,
                                       learning_rate=args.learning_rate,
                                       optimizer=Regularizer[args.optimizer])

    progress.section("Training")
    model.train_model(matrix_train[:train_index], args.corruption, args.epoch)

    for i in range(args.active_iteration):
        print('This is step {} \n'.format(i))
        print('The number of ones in train set is {}'.format(len(matrix_train[train_index:].nonzero()[0])))
        print('The number of ones in active set is {}'.format(len(matrix_active[train_index:].nonzero()[0])))

        progress.section("Predicting")
        observation = active_models[args.active_model](model=model, matrix=matrix_train[train_index:].A, ci=args.confidence_interval, num_latent_sampling=args.num_latent_sampling)

        progress.section("Update Train Set")
        matrix_train, history_items = update_matrix(history_items, matrix_train,
                                                    matrix_active, observation,
                                                    train_index, args.iterative,
                                                    args.sample_from_all,
                                                    args.num_item_per_iter,
                                                    args.active_iteration, args.gpu)

        if not args.iterative:
            break

#    matrix_train = matrix_train + matrix_active
    print('The number of ones in train set is {}'.format(len(matrix_train[train_index:].nonzero()[0])))

    progress.section("Re-Training")
    model.train_model(matrix_train, args.corruption, args.epoch)

    progress.section("Re-Predicting")
    observation = active_models['Greedy'](model=model, matrix=matrix_train.A)

    result = {}
    for topk in [5, 10, 15, 20, 50]:
        predict_items, _ = sampling_predict(prediction_scores=observation[train_index:],
                                            topK=topk,
                                            matrix_train=matrix_train[train_index:],
                                            matrix_active=matrix_active[train_index:],
                                            sample_from_all=True,
                                            iterative=False,
                                            history_items=np.array([]),
                                            gpu=args.gpu)

        progress.section("Create Metrics")
        result.update(eval(matrix_test[train_index:], topk, predict_items))

    print(result)

    model.sess.close()
    tf.reset_default_graph()