예제 #1
0
def sensitivity(train, validation, params):
    progress = WorkSplitter()
    progress.section("PMI-PLRec Default")
    RQ, Yt, _ = params['models']['NCE-PLRec'](train,
                                              embeded_matrix=np.empty((0)),
                                              iteration=params['iter'],
                                              rank=params['rank'],
                                              lam=params['lambda'],
                                              root=1.0)
    Y = Yt.T

    default_prediction = predict(matrix_U=RQ,
                                 matrix_V=Y,
                                 topK=params['topK'][-1],
                                 matrix_Train=train,
                                 gpu=True)

    default_result = evaluate(default_prediction, validation, params['metric'],
                              params['topK'])
    print("-")
    print("Rank: {0}".format(params['rank']))
    print("Lambda: {0}".format(params['lambda']))
    print("SVD Iteration: {0}".format(params['iter']))
    print("Evaluation Ranking Topk: {0}".format(params['topK']))
    for key in default_result.keys():
        print("{0} :{1}".format(key, default_result[key]))

    sensitivity_results = dict()
    for root in tqdm(params['root']):
        progress.section("PMI-PLRec, Root: " + str(root))
        RQ, Yt, _ = params['models']['NCE-PLRec'](train,
                                                  embeded_matrix=np.empty((0)),
                                                  iteration=params['iter'],
                                                  rank=params['rank'],
                                                  lam=params['lambda'],
                                                  root=root)
        Y = Yt.T

        prediction = predict(matrix_U=RQ,
                             matrix_V=Y,
                             topK=params['topK'][-1],
                             matrix_Train=train,
                             gpu=True)

        result = evaluate(prediction, validation, params['metric'],
                          params['topK'])

        sensitivity_results[root] = result
        print("-")
        print("Root: {0}".format(root))
        print("Rank: {0}".format(params['rank']))
        print("Lambda: {0}".format(params['lambda']))
        print("SVD Iteration: {0}".format(params['iter']))
        print("Evaluation Ranking Topk: {0}".format(params['topK']))
        for key in result.keys():
            print("{0} :{1}".format(key, result[key]))

    return default_result, sensitivity_results
    def train_model(self, matrix_train, matrix_valid, epoch, metric_names):
        user_item_matrix = lil_matrix(matrix_train)
        user_item_pairs = np.asarray(user_item_matrix.nonzero()).T

        # Training
        best_AUC, best_RQ, best_Y, best_uBias, best_iBias = 0, [], [], [], []
        for i in tqdm(range(epoch)):
            batches = self.get_batches(user_item_pairs, matrix_train, self.batch_size)
            for step in range(len(batches)):
                reg_idx = self.compute_2i_regularization_id(batches[step][1], self.num_items)
                feed_dict = {self.user_idx: batches[step][0],
                             self.item_idx: batches[step][1],
                             self.label: batches[step][2],
                             self.reg_idx: reg_idx
                             }
                _ = self.sess.run([self.optimizer], feed_dict=feed_dict)

            RQ, Y, uBias, iBias = self.sess.run([self.user_embeddings,
                                                 self.item_embeddings[0:self.num_items, :],
                                                 self.user_bias_embeddings,
                                                 self.item_bias_embeddings[0:self.num_items]])
            prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=matrix_valid, ubias=uBias, ibias=iBias,
                                 gpu=self.gpu_on)
            result = evaluate(prediction, matrix_valid, metric_names, gpu=self.gpu_on)

            if result['AUC'][0] > best_AUC:
                best_AUC = result['AUC'][0]
                best_RQ, best_Y, best_uBias, best_iBias = RQ, Y, uBias, iBias

        return best_RQ, best_Y, best_uBias, best_iBias
예제 #3
0
    def train_model(self, rating_matrix, matrix_valid, invP, epoch, metric_names):
        user_item_matrix = lil_matrix(rating_matrix)
        user_item_pairs = np.asarray(user_item_matrix.nonzero()).T

        # Training
        best_AUC, best_RQ, best_Y, best_uBias, best_iBias = 0, [], [], [], []
        for i in tqdm(range(epoch)):
            batches = self.get_batches(user_item_pairs, rating_matrix, invP, self.batch_size)
            for step in range(len(batches)):
                feed_dict = {self.user_idx: batches[step][0],
                             self.item_idx: batches[step][1],
                             self.label: batches[step][2],
                             self.weight: batches[step][3]
                             }
                _ = self.sess.run([self.optimizer], feed_dict=feed_dict)

            RQ, Y, uBias, iBias = self.sess.run(
                [self.user_embeddings, self.item_embeddings,
                 self.user_bias_embeddings, self.item_bias_embeddings])
            prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=matrix_valid, ubias=uBias, ibias=iBias, gpu=self.gpu_on)
            result = evaluate(prediction, matrix_valid, metric_names, gpu=self.gpu_on)

            if result['AUC'][0] > best_AUC:
                best_AUC = result['AUC'][0]
                best_RQ, best_Y, best_uBias, best_iBias = RQ, Y, uBias, iBias

        return best_RQ, best_Y, best_uBias, best_iBias
예제 #4
0
    def train_model(self, matrix_train, matrix_valid, epoch, metric_names):
        user_item_matrix = lil_matrix(matrix_train)
        user_item_pairs = np.asarray(user_item_matrix.nonzero()).T

        # Training
        best_AUC, best_RQ, best_Y, best_uBias, best_iBias, best_refined_label, best_prediction = 0, [], [], [], [], [], []
        for i in tqdm(range(epoch)):
            batches = self.get_batches(user_item_pairs, matrix_train, self.batch_size)
            refined_label = []
            for step in range(len(batches)):
                feed_dict = {self.user_idx: batches[step][0],
                             self.item_idx: batches[step][1],
                             self.label: batches[step][2]
                             }
                _, temp_refined_label = self.sess.run([self.optimizer, self.refined_label], feed_dict=feed_dict)
                refined_label.append(np.stack((batches[step][0], batches[step][1], np.asarray(temp_refined_label)), axis=-1))

            RQ, Y, uBias, iBias = self.sess.run(
                [self.norm_user_embeddings, self.norm_item_embeddings,
                 self.norm_user_bias_embeddings, self.norm_item_bias_embeddings])
            prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=matrix_valid, ubias=uBias, ibias=iBias,
                                 gpu=self.gpu_on)
            result = evaluate(prediction, matrix_valid, metric_names, gpu=self.gpu_on)

            if result['AUC'][0] > best_AUC:
                best_AUC = result['AUC'][0]
                best_RQ, best_Y, best_uBias, best_iBias, best_refined_label, best_prediction = RQ, Y, uBias, iBias, np.vstack(refined_label), prediction

        return best_RQ, best_Y, best_uBias, best_iBias, best_refined_label, user_item_pairs, best_prediction
예제 #5
0
    def train_model(self, rating_matrix, matrix_valid, epoch, metric_names):
        # Training
        best_AUC, best_RQ, best_X, best_xBias, best_Y, best_yBias = 0, [], [], [], [], []
        for i in tqdm(range(epoch)):
            batches, sample_idx = self.get_batches(rating_matrix,
                                                   self.batch_size)
            for step in range(len(batches)):
                feed_dict = {
                    self.inputs: batches[step].todense(),
                    self.sample_idx: sample_idx[step]
                }
                _ = self.sess.run([self.optimizer], feed_dict=feed_dict)

            RQ, X, xBias = self.get_RQ(rating_matrix)
            Y = self.get_Y()
            yBias = self.get_yBias()
            prediction = predict(matrix_U=RQ,
                                 matrix_V=Y.T,
                                 matrix_Valid=matrix_valid,
                                 bias=yBias,
                                 gpu=self.gpu_on)
            result = evaluate(prediction,
                              matrix_valid,
                              metric_names,
                              gpu=self.gpu_on)

            if result['AUC'][0] > best_AUC:
                best_AUC = result['AUC'][0]
                best_RQ, best_X, best_xBias, best_Y, best_yBias = RQ, X, xBias, Y, yBias

        return best_RQ, best_X, best_xBias, best_Y, best_yBias
예제 #6
0
    def train_model(self, rating_matrix, matrix_unif_train, matrix_valid,
                    epoch, metric_names):
        user_item_matrix = lil_matrix(rating_matrix)
        user_item_pairs = np.asarray(user_item_matrix.nonzero()).T
        unif_user_item_matrix = lil_matrix(matrix_unif_train)
        unif_user_item_pairs = np.asarray(unif_user_item_matrix.nonzero()).T

        user_set, user_activity = np.unique(user_item_pairs[:, 0],
                                            return_counts=True)
        unif_user_set = np.unique(unif_user_item_pairs[:, 0])
        unif_user_activity = user_activity[unif_user_set]
        sorted_idx = np.argsort(unif_user_activity)
        tail_users = unif_user_set[sorted_idx[0:int(len(unif_user_set) / 2)]]
        head_users = unif_user_set[sorted_idx[int(len(unif_user_set) / 2):]]

        item_set, item_popularity = np.unique(user_item_pairs[:, 1],
                                              return_counts=True)
        unif_item_set = np.unique(unif_user_item_pairs[:, 1])
        unif_item_popularity = item_popularity[unif_item_set]
        sorted_idx = np.argsort(unif_item_popularity)
        tail_items = unif_item_set[sorted_idx[0:int(len(unif_item_set) / 2)]]
        head_items = unif_item_set[sorted_idx[int(len(unif_item_set) / 2):]]

        # Training
        best_AUC, best_RQ, best_Y, best_uBias, best_iBias = 0, [], [], [], []
        for i in tqdm(range(epoch)):
            batches = self.get_batches(user_item_pairs, unif_user_item_pairs,
                                       matrix_unif_train, rating_matrix,
                                       self.batch_size, head_users, tail_users,
                                       head_items, tail_items)
            for step in range(len(batches)):
                feed_dict = {
                    self.user_idx: batches[step][0],
                    self.item_idx: batches[step][1],
                    self.label: batches[step][2]
                }
                _, loss = self.sess.run([self.optimizer, self.loss],
                                        feed_dict=feed_dict)

            RQ, Y, uBias, iBias = self.sess.run([
                self.user_embeddings, self.item_embeddings,
                self.user_bias_embeddings, self.item_bias_embeddings
            ])
            prediction = predict(matrix_U=RQ,
                                 matrix_V=Y,
                                 matrix_Valid=matrix_valid,
                                 ubias=uBias,
                                 ibias=iBias,
                                 gpu=self.gpu_on)
            result = evaluate(prediction,
                              matrix_valid,
                              metric_names,
                              gpu=self.gpu_on)

            if result['AUC'][0] > best_AUC:
                best_AUC = result['AUC'][0]
                best_RQ, best_Y, best_uBias, best_iBias = RQ, Y, uBias, iBias

        return best_RQ, best_Y, best_uBias, best_iBias
예제 #7
0
def execute(test, params, folder='latent'):
    df = pd.DataFrame(columns=['model', 'way'])

    if params['model'] in ['DeepAutoRec', 'HintAE', 'SoftLabelAE']:
        if params['way'] is not None:
            RQ = np.load('{3}/{2}_U_{0}_{1}.npy'.format(
                params['model'], params['rank'], params['way'], folder))
            Y = np.load('{3}/{2}_K_{0}_{1}.npy'.format(params['model'],
                                                       params['rank'],
                                                       params['way'], folder))
            Bias = np.load('{3}/{2}_kB_{0}_{1}.npy'.format(
                params['model'], params['rank'], params['way'], folder))

        else:
            RQ = np.load('{2}/U_{0}_{1}.npy'.format(params['model'],
                                                    params['rank'], folder))
            Y = np.load('{2}/K_{0}_{1}.npy'.format(params['model'],
                                                   params['rank'], folder))
            Bias = np.load('{2}/kB_{0}_{1}.npy'.format(params['model'],
                                                       params['rank'], folder))
    else:
        if params['way'] is not None:
            RQ = np.load('{3}/{2}_U_{0}_{1}.npy'.format(
                params['model'], params['rank'], params['way'], folder))
            Y = np.load('{3}/{2}_Y_{0}_{1}.npy'.format(params['model'],
                                                       params['rank'],
                                                       params['way'], folder))
            Bias = np.load('{3}/{2}_yB_{0}_{1}.npy'.format(
                params['model'], params['rank'], params['way'], folder))

        else:
            RQ = np.load('{2}/U_{0}_{1}.npy'.format(params['model'],
                                                    params['rank'], folder))
            Y = np.load('{2}/Y_{0}_{1}.npy'.format(params['model'],
                                                   params['rank'], folder))
            Bias = np.load('{2}/yB_{0}_{1}.npy'.format(params['model'],
                                                       params['rank'], folder))

    prediction = predict(matrix_U=RQ,
                         matrix_V=Y.T,
                         matrix_Valid=test,
                         bias=Bias)

    result = evaluate(prediction, test, params['metric'])

    result_dict = {'model': params['model'], 'way': params['way']}

    for name in result.keys():
        result_dict[name] = round(result[name][0], 8)

    df = df.append(result_dict, ignore_index=True)
    return df
예제 #8
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyper parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {}".format(args.path))
    print("Train File Name: {}".format(args.train))
    if args.validation:
        print("Valid File Name: {}".format(args.valid))
    print("Algorithm: {}".format(args.model))
    print("Lambda Diversity: {}".format(args.lambda_diversity))
    print("Lambda Serendipity: {}".format(args.lambda_serendipity))
    print("Nearest Neighbor Number: {}".format(args.k))
    print("Evaluation Ranking Topk: {}".format(args.topk))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()
    R_train = load_numpy(path=args.path, name=args.train)
    print("Elapsed: {}".format(inhour(time.time() - start_time)))
    print("Train U-I Dimensions: {}".format(R_train.shape))

    progress.section("Train")
    model = models[args.model]()
    model.train(R_train)

    progress.section("Predict")
    prediction_score = model.predict(
        R_train,
        k=args.k,
        lambda_diversity=args.lambda_diversity,
        lambda_serendipity=args.lambda_serendipity)

    prediction = predict(prediction_score=prediction_score,
                         topK=args.topk,
                         matrix_Train=R_train)

    if args.validation:
        progress.section("Create Metrics")
        start_time = time.time()

        metric_names = [
            'R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP'
        ]

        R_valid = load_numpy(path=args.path, name=args.valid)
        result = evaluate(prediction, R_valid, metric_names, [args.topk])
        print("-")
        for metric in result.keys():
            print("{}:{}".format(metric, result[metric]))
        print("Elapsed: {}".format(inhour(time.time() - start_time)))
예제 #9
0
def simpleKNNPrediction(similarityMatrix, predictionMatrix, kValue,
                        validOrTestMatrix):
    "Declaration for kRange = range(50,120,10)"
    similarity = train(similarityMatrix)
    user_item_prediction_score = predict(predictionMatrix,
                                         kValue,
                                         similarity,
                                         item_similarity_en=False)
    user_item_predict = prediction(user_item_prediction_score, 50,
                                   predictionMatrix)
    user_item_res = evaluate(user_item_predict, validOrTestMatrix)

    MAP10 = user_item_res.get('MAP@10')[0]

    return MAP10
예제 #10
0
def hyper_parameter_tuning(train, validation, params, save_path):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']

    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(columns=['model', 'k', 'topK'])

    num_user = train.shape[0]

    for algorithm in params['models']:

        for k in params['k']:

            if ((df['model'] == algorithm) & (df['k'] == k)).any():
                continue

            format = "model: {}, k: {}"
            progress.section(format.format(algorithm, k))

            progress.subsection("Training")
            model = params['models'][algorithm]()
            model.train(train)

            progress.subsection("Prediction")
            prediction_score = model.predict(train, k=k)

            prediction = predict(prediction_score=prediction_score,
                                 topK=params['topK'][-1],
                                 matrix_Train=train)

            progress.subsection("Evaluation")
            result = evaluate(prediction, validation, params['metric'],
                              params['topK'])

            result_dict = {'model': algorithm, 'k': k}

            for name in result.keys():
                result_dict[name] = [
                    round(result[name][0], 4),
                    round(result[name][1], 4)
                ]

            df = df.append(result_dict, ignore_index=True)

            save_dataframe_csv(df, table_path, save_path)
예제 #11
0
def execute(train, test, params, model, gpu_on=True, analytical=False):
    progress = WorkSplitter()

    columns = ['model', 'rank', 'lambda', 'epoch', 'corruption', 'topK']

    progress.section("\n".join(
        [":".join((str(k), str(params[k]))) for k in columns]))

    df = pd.DataFrame(columns=columns)

    progress.subsection("Train")
    RQ, Yt, Bias = model(train,
                         epoch=params['epoch'],
                         lamb=params['lambda'],
                         rank=params['rank'],
                         corruption=params['corruption'])
    Y = Yt.T

    progress.subsection("Prediction")
    prediction = predict(matrix_U=RQ,
                         matrix_V=Y,
                         bias=Bias,
                         topK=params['topK'][-1],
                         matrix_Train=train,
                         gpu=gpu_on)

    progress.subsection("Evaluation")
    result = evaluate(prediction,
                      test,
                      params['metric'],
                      params['topK'],
                      analytical=analytical)

    if analytical:
        return result
    else:
        result_dict = params

        for name in result.keys():
            result_dict[name] = [
                round(result[name][0], 4),
                round(result[name][1], 4)
            ]
        df = df.append(result_dict, ignore_index=True)

        return df
예제 #12
0
def lookup(train, validation, params, measure='Cosine', gpu_on=True):
    progress = WorkSplitter()
    df = pd.DataFrame(columns=['model'])

    num_user = train.shape[0]

    for algorithm in params['models']:

        RQ = np.load('latent/U_{0}_{1}.npy'.format(algorithm, params['rank']))
        Y = np.load('latent/V_{0}_{1}.npy'.format(algorithm, params['rank']))
        if os.path.isfile('latent/B_{0}_{1}.npy'.format(
                algorithm, params['rank'])):
            Bias = np.load('latent/B_{0}_{1}.npy'.format(
                algorithm, params['rank']))
        else:
            Bias = None

        progress.subsection("Prediction")

        prediction = predict(matrix_U=RQ,
                             matrix_V=Y,
                             measure=measure,
                             bias=Bias,
                             topK=params['topK'][-1],
                             matrix_Train=train,
                             gpu=gpu_on)

        progress.subsection("Evaluation")

        result = evaluate(prediction, validation, params['metric'],
                          params['topK'])

        result_dict = {'model': algorithm}

        for name in result.keys():
            result_dict[name] = [
                round(result[name][0], 4),
                round(result[name][1], 4)
            ]

        df = df.append(result_dict, ignore_index=True)
    return df
예제 #13
0
def execute(train, test, params, model, analytical=False):
    progress = WorkSplitter()

    columns = ['model', 'k', 'topK']

    progress.section("\n".join(
        [":".join((str(k), str(params[k]))) for k in columns]))

    df = pd.DataFrame(columns=columns)

    progress.subsection("Train")
    model = model()
    model.train(train)

    progress.subsection("Prediction")
    prediction_score = model.predict(train, k=params['k'])

    prediction = predict(prediction_score=prediction_score,
                         topK=params['topK'][-1],
                         matrix_Train=train)

    progress.subsection("Evaluation")
    result = evaluate(prediction,
                      test,
                      params['metric'],
                      params['topK'],
                      analytical=analytical)

    if analytical:
        return result
    else:
        result_dict = params

        for name in result.keys():
            result_dict[name] = [
                round(result[name][0], 4),
                round(result[name][1], 4)
            ]
        df = df.append(result_dict, ignore_index=True)

        return df
예제 #14
0
def execute(test, params, folder='latent'):
    RQ, Y, uBias, iBias = None, None, None, None
    df = pd.DataFrame(columns=['model', 'way'])

    if params['way'] is not None:
        RQ = np.load('{3}/{2}_U_{0}_{1}.npy'.format(params['model'],
                                                    params['rank'],
                                                    params['way'], folder))
        Y = np.load('{3}/{2}_V_{0}_{1}.npy'.format(params['model'],
                                                   params['rank'],
                                                   params['way'], folder))
        uBias = np.load('{3}/{2}_uB_{0}_{1}.npy'.format(
            params['model'], params['rank'], params['way'], folder))
        iBias = np.load('{3}/{2}_iB_{0}_{1}.npy'.format(
            params['model'], params['rank'], params['way'], folder))

    else:
        RQ = np.load('{2}/U_{0}_{1}.npy'.format(params['model'],
                                                params['rank'], folder))
        Y = np.load('{2}/V_{0}_{1}.npy'.format(params['model'], params['rank'],
                                               folder))
        uBias = np.load('{2}/uB_{0}_{1}.npy'.format(params['model'],
                                                    params['rank'], folder))
        iBias = np.load('{2}/iB_{0}_{1}.npy'.format(params['model'],
                                                    params['rank'], folder))

    prediction = predict(matrix_U=RQ,
                         matrix_V=Y,
                         matrix_Valid=test,
                         ubias=uBias,
                         ibias=iBias)

    result = evaluate(prediction, test, params['metric'])

    result_dict = {'model': params['model'], 'way': params['way']}

    for name in result.keys():
        result_dict[name] = round(result[name][0], 8)

    df = df.append(result_dict, ignore_index=True)
    return df
예제 #15
0
def hyper_parameter_tuning(train, validation, params, save_path, gpu_on=True):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']

    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(
            columns=['model', 'rank', 'lambda', 'epoch', 'corruption', 'topK'])

    num_user = train.shape[0]

    for algorithm in params['models']:

        for rank in params['rank']:

            for lamb in params['lambda']:

                for corruption in params['corruption']:

                    if ((df['model'] == algorithm) & (df['rank'] == rank) &
                        (df['lambda'] == lamb) &
                        (df['corruption'] == corruption)).any():
                        continue

                    format = "model: {}, rank: {}, lambda: {}, corruption: {}"
                    progress.section(
                        format.format(algorithm, rank, lamb, corruption))
                    RQ, Yt, Bias = params['models'][algorithm](
                        train,
                        epoch=params['epoch'],
                        lamb=lamb,
                        rank=rank,
                        corruption=corruption)
                    Y = Yt.T

                    progress.subsection("Prediction")

                    prediction = predict(matrix_U=RQ,
                                         matrix_V=Y,
                                         bias=Bias,
                                         topK=params['topK'][-1],
                                         matrix_Train=train,
                                         gpu=gpu_on)

                    progress.subsection("Evaluation")

                    result = evaluate(prediction, validation, params['metric'],
                                      params['topK'])

                    result_dict = {
                        'model': algorithm,
                        'rank': rank,
                        'lambda': lamb,
                        'epoch': params['epoch'],
                        'corruption': corruption
                    }

                    for name in result.keys():
                        result_dict[name] = [
                            round(result[name][0], 4),
                            round(result[name][1], 4)
                        ]

                    df = df.append(result_dict, ignore_index=True)

                    save_dataframe_csv(df, table_path, save_path)
예제 #16
0
def computeUUCombination(rtrain,
                         rtrain_userAvg,
                         userVisitMatrix,
                         rtrain_implicit,
                         combinationDict,
                         SimilarityMatrixIndex,
                         kTune,
                         method='max'):

    prediction1 = {}
    prediction2 = {}
    prediction3 = {}
    prediction4 = {}

    for combination, indexList in combinationDict.items():
        #Loop through the similarity matrices
        for index in SimilarityMatrixIndex.keys():
            if index in indexList:
                if index == 1:
                    similarityOne = SimilarityMatrixIndex[1][0]
                elif index == 2:
                    similarityTwo = SimilarityMatrixIndex[2][0]
                elif index == 3:
                    similarityThree = SimilarityMatrixIndex[3][0]
                elif index == 4:
                    similarityFour = SimilarityMatrixIndex[4][0]
            else:
                if index == 1:
                    similarityOne = SimilarityMatrixIndex[1][1]
                elif index == 2:
                    similarityTwo = SimilarityMatrixIndex[2][1]
                elif index == 3:
                    similarityThree = SimilarityMatrixIndex[3][1]
                elif index == 4:
                    similarityFour = SimilarityMatrixIndex[4][1]

        user_item_prediction_score1 = predictUU(rtrain,
                                                kTune,
                                                similarityOne,
                                                similarityTwo,
                                                similarityThree,
                                                similarityFour,
                                                chooseWeigthMethod=method,
                                                item_similarity_en=False)
        user_item_predict1 = prediction(user_item_prediction_score1, 50,
                                        rtrain)
        user_item_res1 = evaluate(user_item_predict1, rvalid)
        prediction1[combination] = user_item_res1.get('MAP@10')[0]

        user_item_prediction_score2 = predictUU(rtrain_userAvg,
                                                kTune,
                                                similarityOne,
                                                similarityTwo,
                                                similarityThree,
                                                similarityFour,
                                                chooseWeigthMethod=method,
                                                item_similarity_en=False)
        user_item_predict2 = prediction(user_item_prediction_score2, 50,
                                        rtrain_userAvg)
        user_item_res2 = evaluate(user_item_predict2, rvalid_userAvg)
        prediction2[combination] = user_item_res2.get('MAP@10')[0]

        user_item_prediction_score3 = predictUU(userVisitMatrix,
                                                kTune,
                                                similarityOne,
                                                similarityTwo,
                                                similarityThree,
                                                similarityFour,
                                                chooseWeigthMethod=method,
                                                item_similarity_en=False)
        user_item_predict3 = prediction(user_item_prediction_score3, 50,
                                        userVisitMatrix)
        user_item_res3 = evaluate(user_item_predict3, rvalid_implicit)
        prediction3[combination] = user_item_res3.get('MAP@10')[0]

        user_item_prediction_score4 = predictUU(rtrain_implicit,
                                                kTune,
                                                similarityOne,
                                                similarityTwo,
                                                similarityThree,
                                                similarityFour,
                                                chooseWeigthMethod=method,
                                                item_similarity_en=False)
        user_item_predict4 = prediction(user_item_prediction_score4, 50,
                                        rtrain_implicit)
        user_item_res4 = evaluate(user_item_predict4, rvalid_implicit)
        prediction4[combination] = user_item_res4.get('MAP@10')[0]

    plotingCombination(prediction1, prediction2, prediction3, prediction4,
                       kTune, method)
예제 #17
0
def hyper_parameter_tuning(train, validation, params, unif_train, save_path, seed, way, dataset, gpu_on):
    progress = WorkSplitter()

    table_path = 'tables/'
    data_name = save_path.split('/')[0]
    save_dir = 'tables/' + data_name + '/'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for algorithm in params['models']:
        if algorithm in ['BiasedMF', 'PropensityMF']:
            df = pd.DataFrame(columns=['model', 'batch_size', 'lambda', 'iter'])
            for batch_size in params['batch_size']:
                for lam in params['lambda']:
                    format = "model: {0}, batch_size: {1}, lambda: {2}"
                    progress.section(format.format(algorithm, batch_size, lam))
                    RQ, Y, uBias, iBias = params['models'][algorithm](train, validation,
                                                                      matrix_unif_train=unif_train,
                                                                      iteration=params['iter'],
                                                                      rank=params['rank'], gpu_on=gpu_on,
                                                                      lam=lam, seed=seed,
                                                                      batch_size=batch_size,
                                                                      way=way,
                                                                      dataset=dataset)

                    progress.subsection("Prediction")
                    prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=validation, ubias=uBias, ibias=iBias,
                                         gpu=gpu_on)

                    progress.subsection("Evaluation")
                    result = evaluate(prediction, validation, params['metric'], gpu=gpu_on)
                    result_dict = {'model': algorithm, 'batch_size': batch_size, 'lambda': lam, 'iter': params['iter']}
                    for name in result.keys():
                        result_dict[name] = round(result[name][0], 8)
                    df = df.append(result_dict, ignore_index=True)
                    save_dataframe_csv(df, table_path, save_path)
        elif algorithm in ['InitFeatureEmbedMF', 'AlterFeatureEmbedMF', 'WRSampleMF']:
            df = pd.DataFrame(columns=['model', 'lambda', 'iter'])
            for lam in params['lambda']:
                format = "model: {0}, lambda: {1}"
                progress.section(format.format(algorithm, lam))
                RQ, Y, uBias, iBias = params['models'][algorithm](train, validation,
                                                                  matrix_unif_train=unif_train,
                                                                  iteration=params['iter'],
                                                                  rank=params['rank'],
                                                                  gpu_on=gpu_on,
                                                                  lam=lam, seed=seed,
                                                                  batch_size=params['batch_size'],
                                                                  way=way,
                                                                  dataset=dataset)

                progress.subsection("Prediction")
                prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=validation, ubias=uBias, ibias=iBias,
                                     gpu=gpu_on)

                progress.subsection("Evaluation")
                result = evaluate(prediction, validation, params['metric'], gpu=gpu_on)
                result_dict = {'model': algorithm, 'lambda': lam, 'iter': params['iter']}
                for name in result.keys():
                    result_dict[name] = round(result[name][0], 8)
                df = df.append(result_dict, ignore_index=True)
                save_dataframe_csv(df, table_path, save_path)
        elif algorithm in ['CausalSampleMF', 'BridgeLabelMF']:
            df = pd.DataFrame(columns=['model', 'lambda', 'lambda2', 'iter'])
            for lam in params['lambda']:
                for lam2 in params['lambda2']:
                    format = "model: {0}, lambda: {1}, lambda2: {2}"
                    progress.section(format.format(algorithm, lam, lam2))
                    RQ, Y, uBias, iBias = params['models'][algorithm](train, validation,
                                                                      matrix_unif_train=unif_train,
                                                                      iteration=params['iter'],
                                                                      rank=params['rank'],
                                                                      gpu_on=gpu_on,
                                                                      lam=lam, lam2=lam2,
                                                                      seed=seed,
                                                                      batch_size=params['batch_size'],
                                                                      way=way,
                                                                      dataset=dataset)

                    progress.subsection("Prediction")
                    prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=validation, ubias=uBias, ibias=iBias,
                                         gpu=gpu_on)

                    progress.subsection("Evaluation")
                    result = evaluate(prediction, validation, params['metric'], gpu=gpu_on)
                    result_dict = {'model': algorithm, 'lambda': lam, 'lambda2': lam2, 'iter': params['iter']}
                    for name in result.keys():
                        result_dict[name] = round(result[name][0], 8)
                    df = df.append(result_dict, ignore_index=True)
                    save_dataframe_csv(df, table_path, save_path)
        elif algorithm in ['UnionSampleMF', 'RefineLabelMF']:
            df = pd.DataFrame(columns=['model', 'confidence', 'iter'])
            for conf in params['confidence']:
                format = "model: {0}, confidence: {1}"
                progress.section(format.format(algorithm, conf))
                RQ, Y, uBias, iBias = params['models'][algorithm](train, validation,
                                                                  matrix_unif_train=unif_train,
                                                                  iteration=params['iter'],
                                                                  rank=params['rank'],
                                                                  gpu_on=gpu_on,
                                                                  lam=params['lambda'], seed=seed,
                                                                  batch_size=params['batch_size'],
                                                                  way=way,
                                                                  confidence=conf,
                                                                  dataset=dataset)

                progress.subsection("Prediction")
                prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=validation, ubias=uBias, ibias=iBias,
                                     gpu=gpu_on)

                progress.subsection("Evaluation")
                result = evaluate(prediction, validation, params['metric'], gpu=gpu_on)
                result_dict = {'model': algorithm, 'confidence': conf, 'iter': params['iter']}
                for name in result.keys():
                    result_dict[name] = round(result[name][0], 8)
                df = df.append(result_dict, ignore_index=True)
                save_dataframe_csv(df, table_path, save_path)
        elif algorithm in ['BatchSampleMF']:
            df = pd.DataFrame(columns=['model', 'step', 'iter'])
            for step in params['step']:
                format = "model: {0}, step: {1}"
                progress.section(format.format(algorithm, step))
                RQ, Y, uBias, iBias = params['models'][algorithm](train, validation,
                                                                  matrix_unif_train=unif_train,
                                                                  iteration=params['iter'],
                                                                  rank=params['rank'],
                                                                  gpu_on=gpu_on,
                                                                  lam=params['lambda'], seed=seed,
                                                                  batch_size=params['batch_size'],
                                                                  way=way,
                                                                  step=step,
                                                                  dataset=dataset)

                progress.subsection("Prediction")
                prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=validation, ubias=uBias, ibias=iBias,
                                     gpu=gpu_on)

                progress.subsection("Evaluation")
                result = evaluate(prediction, validation, params['metric'], gpu=gpu_on)
                result_dict = {'model': algorithm, 'step': step, 'iter': params['iter']}
                for name in result.keys():
                    result_dict[name] = round(result[name][0], 8)
                df = df.append(result_dict, ignore_index=True)
                save_dataframe_csv(df, table_path, save_path)
예제 #18
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyper parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {0}".format(args.path))
    print("Train File Name: {0}".format(args.train))
    if args.validation:
        print("Valid File Name: {0}".format(args.valid))
    print("Algorithm: {0}".format(args.model))
    if args.item == True:
        mode = "Item-based"
    else:
        mode = "User-based"
    print("Mode: {0}".format(mode))
    print("Alpha: {0}".format(args.alpha))
    print("Rank: {0}".format(args.rank))
    print("Lambda: {0}".format(args.lamb))
    print("SVD/Alter Iteration: {0}".format(args.iter))
    print("Evaluation Ranking Topk: {0}".format(args.topk))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()
    if args.shape is None:
        R_train = load_numpy(path=args.path, name=args.train)
    else:
        # R_train = load_pandas(path=args.path, name=args.train, shape=args.shape)
        R_train = load_csv(path=args.path, name=args.train, shape=args.shape)
    print "Elapsed: {0}".format(inhour(time.time() - start_time))

    print("Train U-I Dimensions: {0}".format(R_train.shape))

    # Item-Item or User-User
    if args.item == True:
        RQ, Yt, Bias = models[args.model](R_train,
                                          embeded_matrix=np.empty((0)),
                                          iteration=args.iter,
                                          rank=args.rank,
                                          corruption=args.corruption,
                                          lam=args.lamb,
                                          alpha=args.alpha,
                                          seed=args.seed,
                                          root=args.root)
        Y = Yt.T
    else:
        Y, RQt, Bias = models[args.model](R_train.T,
                                          embeded_matrix=np.empty((0)),
                                          iteration=args.iter,
                                          rank=args.rank,
                                          corruption=args.corruption,
                                          lam=args.lamb,
                                          alpha=args.alpha,
                                          seed=args.seed,
                                          root=args.root)
        RQ = RQt.T

    # Save Files
    # progress.section("Save U-V Matrix")
    # start_time = time.time()
    # save_mxnet(matrix=RQ, path=args.path+mode+'/',
    #            name='U_{0}_{1}_{2}'.format(args.rank, args.lamb, args.model))
    # save_mxnet(matrix=Y, path=args.path+mode+'/',
    #            name='V_{0}_{1}_{2}'.format(args.rank, args.lamb, args.model))
    # print "Elapsed: {0}".format(inhour(time.time() - start_time))

    np.save('latent/U_{0}_{1}'.format(args.model, args.rank), RQ)
    np.save('latent/V_{0}_{1}'.format(args.model, args.rank), Y)
    if Bias is not None:
        np.save('latent/B_{0}_{1}'.format(args.model, args.rank), Bias)

    progress.section("Predict")
    prediction = predict(matrix_U=RQ,
                         matrix_V=Y,
                         bias=Bias,
                         topK=args.topk,
                         matrix_Train=R_train,
                         measure=args.sim_measure,
                         gpu=True)
    if args.validation:
        progress.section("Create Metrics")
        start_time = time.time()

        metric_names = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision']
        R_valid = load_numpy(path=args.path, name=args.valid)
        result = evaluate(prediction, R_valid, metric_names, [args.topk])
        print("-")
        for metric in result.keys():
            print("{0}:{1}".format(metric, result[metric]))
        print "Elapsed: {0}".format(inhour(time.time() - start_time))
def usercategory(Rtrain,
                 Rvalid,
                 df_input,
                 topK,
                 metric,
                 problem,
                 model_folder,
                 gpu_on=True):

    user_observation_counts = np.array(np.sum(Rtrain, axis=1)).flatten()
    user_observation_counts = user_observation_counts[
        np.array(np.sum(Rvalid, axis=1)).flatten() != 0]

    index = None
    evaluated_metrics = None

    medians = []
    giant_dataframes = []

    for idx, row in df_input.iterrows():
        row = row.to_dict()

        RQ = np.load('{2}/U_{0}_{1}.npy'.format(row['model'], row['rank'],
                                                model_folder))
        Y = np.load('{2}/V_{0}_{1}.npy'.format(row['model'], row['rank'],
                                               model_folder))

        if os.path.isfile('{2}/B_{0}_{1}.npy'.format(row['model'], row['rank'],
                                                     model_folder)):
            Bias = np.load('{2}/B_{0}_{1}.npy'.format(row['model'],
                                                      row['rank'],
                                                      model_folder))
        else:
            Bias = None

        prediction = predict(matrix_U=RQ,
                             matrix_V=Y,
                             bias=Bias,
                             topK=topK[-1],
                             matrix_Train=Rtrain,
                             measure=row['similarity'],
                             gpu=gpu_on)

        result = evaluate(prediction, Rvalid, metric, topK, analytical=True)

        df = pd.DataFrame(result)
        df['model'] = row['model']
        df['user_count'] = user_observation_counts

        giant_dataframes.append(df)

        if evaluated_metrics is None:
            evaluated_metrics = result.keys()

    giant_df = pd.concat(giant_dataframes)
    giant_df['group'] = getGroup(giant_df['user_count'].values)

    giant_df = giant_df.sort_values('group',
                                    ascending=True).reset_index(drop=True)

    for metric in evaluated_metrics:
        pandas_bar_plot(x='group',
                        y=metric,
                        hue='model',
                        x_name='User Category',
                        y_name=metric,
                        df=giant_df,
                        folder='analysis/{0}/numofrating'.format(problem),
                        name=metric)
예제 #20
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyper parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {0}".format(args.path))
    print("Train File Name: {0}".format(args.dataset + args.train))
    print("Uniform Train File Name: {0}".format(args.dataset +
                                                args.unif_train))
    print("Valid File Name: {0}".format(args.dataset + args.valid))
    print("Algorithm: {0}".format(args.model))
    print("Way: {0}".format(args.way))
    print("Seed: {0}".format(args.seed))
    print("Batch Size: {0}".format(args.batch_size))
    print("Rank: {0}".format(args.rank))
    print("Lambda: {0}".format(args.lamb))
    print("Iteration: {0}".format(args.iter))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()

    train = load_numpy(path=args.path, name=args.dataset + args.train)

    print("Elapsed: {0}".format(inhour(time.time() - start_time)))

    print("Train U-I Dimensions: {0}".format(train.shape))

    # Train Model
    valid = load_numpy(path=args.path, name=args.dataset + args.valid)

    unif_train = load_numpy(path=args.path,
                            name=args.dataset + args.unif_train)

    if args.model in ['DeepAutoRec', 'HintAE', 'SoftLabelAE']:
        RQ, X, xBias, Y, yBias, Z, zBias, K, kBias = models[args.model](
            train,
            valid,
            dataset=args.dataset,
            matrix_unif_train=unif_train,
            iteration=args.iter,
            rank=args.rank,
            rank2=args.rank2,
            gpu_on=args.gpu,
            lam=args.lamb,
            seed=args.seed,
            batch_size=args.batch_size,
            way=args.way,
            confidence=args.confidence,
            step=args.step,
            tau=args.tau)

        save_path = 'latent/' + args.dataset
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        if args.way is None:
            np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ)
            np.save(save_path + '/Y_{0}_{1}'.format(args.model, args.rank), Y)
            np.save(save_path + '/X_{0}_{1}'.format(args.model, args.rank), X)
            np.save(save_path + '/Z_{0}_{1}'.format(args.model, args.rank), Z)
            np.save(save_path + '/K_{0}_{1}'.format(args.model, args.rank), K)
            if xBias is not None:
                np.save(
                    save_path + '/xB_{0}_{1}'.format(args.model, args.rank),
                    xBias)
                np.save(
                    save_path + '/yB_{0}_{1}'.format(args.model, args.rank),
                    yBias)
                np.save(
                    save_path + '/zB_{0}_{1}'.format(args.model, args.rank),
                    zBias)
                np.save(
                    save_path + '/kB_{0}_{1}'.format(args.model, args.rank),
                    kBias)
        else:
            np.save(
                save_path + '/' + args.way +
                '_U_{0}_{1}'.format(args.model, args.rank), RQ)
            np.save(
                save_path + '/' + args.way +
                '_Y_{0}_{1}'.format(args.model, args.rank), Y)
            np.save(
                save_path + '/' + args.way +
                '_X_{0}_{1}'.format(args.model, args.rank), X)
            np.save(
                save_path + '/' + args.way +
                '_Z_{0}_{1}'.format(args.model, args.rank), Z)
            np.save(
                save_path + '/' + args.way +
                '_K_{0}_{1}'.format(args.model, args.rank), K)
            if xBias is not None:
                np.save(
                    save_path + '/' + args.way +
                    '_xB_{0}_{1}'.format(args.model, args.rank), xBias)
                np.save(
                    save_path + '/' + args.way +
                    '_yB_{0}_{1}'.format(args.model, args.rank), yBias)
                np.save(
                    save_path + '/' + args.way +
                    '_zB_{0}_{1}'.format(args.model, args.rank), zBias)
                np.save(
                    save_path + '/' + args.way +
                    '_kB_{0}_{1}'.format(args.model, args.rank), kBias)

        progress.section("Predict")
        prediction = predict(matrix_U=RQ,
                             matrix_V=K.T,
                             matrix_Valid=valid,
                             bias=yBias,
                             gpu=args.gpu)

        progress.section("Evaluation")
        start_time = time.time()
        metric_names = ['NLL', 'AUC']
        result = evaluate(prediction, valid, metric_names, gpu=args.gpu)

        print("----Final Result----")
        for metric in result.keys():
            print("{0}:{1}".format(metric, result[metric]))
        print("Elapsed: {0}".format(inhour(time.time() - start_time)))
    else:
        RQ, X, xBias, Y, yBias = models[args.model](
            train,
            valid,
            dataset=args.dataset,
            matrix_unif_train=unif_train,
            iteration=args.iter,
            rank=args.rank,
            gpu_on=args.gpu,
            lam=args.lamb,
            lam2=args.lamb2,
            seed=args.seed,
            batch_size=args.batch_size,
            way=args.way,
            confidence=args.confidence,
            step=args.step)

        save_path = 'latent/' + args.dataset
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        if args.way is None:
            np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ)
            np.save(save_path + '/Y_{0}_{1}'.format(args.model, args.rank), Y)
            np.save(save_path + '/X_{0}_{1}'.format(args.model, args.rank), X)
            if xBias is not None:
                np.save(
                    save_path + '/xB_{0}_{1}'.format(args.model, args.rank),
                    xBias)
                np.save(
                    save_path + '/yB_{0}_{1}'.format(args.model, args.rank),
                    yBias)
        else:
            np.save(
                save_path + '/' + args.way +
                '_U_{0}_{1}'.format(args.model, args.rank), RQ)
            np.save(
                save_path + '/' + args.way +
                '_Y_{0}_{1}'.format(args.model, args.rank), Y)
            np.save(
                save_path + '/' + args.way +
                '_X_{0}_{1}'.format(args.model, args.rank), X)
            if xBias is not None:
                np.save(
                    save_path + '/' + args.way +
                    '_xB_{0}_{1}'.format(args.model, args.rank), xBias)
                np.save(
                    save_path + '/' + args.way +
                    '_yB_{0}_{1}'.format(args.model, args.rank), yBias)

        progress.section("Predict")
        prediction = predict(matrix_U=RQ,
                             matrix_V=Y.T,
                             matrix_Valid=valid,
                             bias=yBias,
                             gpu=args.gpu)

        progress.section("Evaluation")
        start_time = time.time()
        metric_names = ['NLL', 'AUC']
        result = evaluate(prediction, valid, metric_names, gpu=args.gpu)

        print("----Final Result----")
        for metric in result.keys():
            print("{0}:{1}".format(metric, result[metric]))
        print("Elapsed: {0}".format(inhour(time.time() - start_time)))
예제 #21
0
def hyper_parameter_tuning(train, validation, params, save_path, measure='Cosine', gpu_on=True):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']

    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(columns=['model', 'similarity', 'alpha', 'batch_size',
                                   'corruption', 'epoch', 'iteration', 'key_dimension',
                                   'lambda', 'learning_rate', 'mode_dimension',
                                   'normalize', 'rank', 'root', 'topK'])

    num_user = train.shape[0]

    for algorithm in params['models']:

        for alpha in params['alpha']:

            for batch_size in params['batch_size']:

                for corruption in params['corruption']:

                    for epoch in params['epoch']:

                        for iteration in params['iteration']:

                            for key_dim in params['key_dimension']:

                                for lamb in params['lambda']:

                                    for learning_rate in params['learning_rate']:

                                        for mode_dim in params['mode_dimension']:

                                            for rank in params['rank']:

                                                for root in params['root']:

                                                    if ((df['model'] == algorithm) &
                                                        (df['alpha'] == alpha) &
                                                        (df['batch_size'] == batch_size) &
                                                        (df['corruption'] == corruption) &
                                                        (df['epoch'] == epoch) &
                                                        (df['iteration'] == iteration) &
                                                        (df['key_dimension'] == key_dim) &
                                                        (df['lambda'] == lamb) &
                                                        (df['learning_rate'] == learning_rate) &
                                                        (df['mode_dimension'] == mode_dim) &
                                                        (df['rank'] == rank) &
                                                        (df['root'] == root)).any():
                                                        continue

                                                    format = "model: {}, alpha: {}, batch_size: {}, corruption: {}, epoch: {}, iteration: {}, \
                                                        key_dimension: {}, lambda: {}, learning_rate: {}, mode_dimension: {}, rank: {}, root: {}"
                                                    progress.section(format.format(algorithm, alpha, batch_size, corruption, epoch, iteration,
                                                                                   key_dim, lamb, learning_rate, mode_dim, rank, root))
                                                    RQ, Yt, Bias = params['models'][algorithm](train,
                                                                                               embedded_matrix=np.empty((0)),
                                                                                               mode_dim=mode_dim,
                                                                                               key_dim=key_dim,
                                                                                               batch_size=batch_size,
                                                                                               learning_rate=learning_rate,
                                                                                               iteration=iteration,
                                                                                               epoch=epoch,
                                                                                               rank=rank,
                                                                                               corruption=corruption,
                                                                                               gpu_on=gpu_on,
                                                                                               lamb=lamb,
                                                                                               alpha=alpha,
                                                                                               root=root)
                                                    Y = Yt.T

                                                    progress.subsection("Prediction")

                                                    prediction = predict(matrix_U=RQ,
                                                                         matrix_V=Y,
                                                                         bias=Bias,
                                                                         topK=params['topK'][-1],
                                                                         matrix_Train=train,
                                                                         measure=measure,
                                                                         gpu=gpu_on)

                                                    progress.subsection("Evaluation")

                                                    result = evaluate(prediction,
                                                                      validation,
                                                                      params['metric'],
                                                                      params['topK'])

                                                    result_dict = {'model': algorithm,
                                                                   'alpha': alpha,
                                                                   'batch_size': batch_size,
                                                                   'corruption': corruption,
                                                                   'epoch': epoch,
                                                                   'iteration': iteration,
                                                                   'key_dimension': key_dim,
                                                                   'lambda': lamb,
                                                                   'learning_rate': learning_rate,
                                                                   'mode_dimension': mode_dim,
                                                                   'rank': rank,
                                                                   'similarity': params['similarity'],
                                                                   'root': root}

                                                    for name in result.keys():
                                                        result_dict[name] = [round(result[name][0], 4), round(result[name][1], 4)]

                                                    df = df.append(result_dict, ignore_index=True)

                                                    save_dataframe_csv(df, table_path, save_path)
예제 #22
0
def converge(Rtrain, Rtest, df, table_path, file_name, epochs=10, gpu_on=True):
    progress = WorkSplitter()
    m, n = Rtrain.shape

    valid_models = autoencoders.keys()

    results = pd.DataFrame(
        columns=['model', 'rank', 'lambda', 'epoch', 'optimizer'])

    for run in range(3):

        for idx, row in df.iterrows():
            row = row.to_dict()

            if row['model'] not in valid_models:
                continue

            progress.section(json.dumps(row))

            row['metric'] = ['NDCG', 'R-Precision']
            row['topK'] = [50]
            if 'optimizer' not in row.keys():
                row['optimizer'] = 'RMSProp'
            try:
                model = autoencoders[row['model']](
                    n,
                    row['rank'],
                    batch_size=100,
                    lamb=row['lambda'],
                    optimizer=Regularizer[row['optimizer']])

            except:
                model = autoencoders[row['model']](
                    m,
                    n,
                    row['rank'],
                    batch_size=100,
                    lamb=row['lambda'],
                    optimizer=Regularizer[row['optimizer']])

            batches = model.get_batches(Rtrain, 100)

            epoch_batch = 50

            for i in range(epochs // epoch_batch):

                model.train_model(Rtrain,
                                  corruption=row['corruption'],
                                  epoch=epoch_batch,
                                  batches=batches)

                RQ = model.get_RQ(Rtrain)
                Y = model.get_Y()
                Bias = model.get_Bias()

                Y = Y.T

                prediction = predict(matrix_U=RQ,
                                     matrix_V=Y,
                                     bias=Bias,
                                     topK=row['topK'][0],
                                     matrix_Train=Rtrain,
                                     measure='Cosine',
                                     gpu=gpu_on)

                result = evaluate(prediction, Rtest, row['metric'],
                                  row['topK'])
                # Note Finished yet
                result_dict = {
                    'model': row['model'],
                    'rank': row['rank'],
                    'lambda': row['lambda'],
                    'optimizer': row['optimizer'],
                    'epoch': (i + 1) * epoch_batch
                }

                for name in result.keys():
                    result_dict[name] = round(result[name][0], 4)
                results = results.append(result_dict, ignore_index=True)

            model.sess.close()
            tf.reset_default_graph()

            save_dataframe_csv(results, table_path, file_name)

    return results
예제 #23
0
def hyper_parameter_tuning(train,
                           validation,
                           params,
                           save_path,
                           measure='Cosine',
                           gpu_on=True):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']

    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(columns=[
            'model', 'rank', 'alpha', 'lambda', 'iter', 'similarity',
            'corruption', 'root', 'topK'
        ])

    num_user = train.shape[0]

    for algorithm in params['models']:

        for rank in params['rank']:

            for alpha in params['alpha']:

                for lam in params['lambda']:

                    for corruption in params['corruption']:

                        for root in params['root']:

                            if ((df['model'] == algorithm) &
                                (df['rank'] == rank) & (df['alpha'] == alpha) &
                                (df['lambda'] == lam) &
                                (df['corruption'] == corruption) &
                                (df['root'] == root)).any():
                                continue

                            format = "model: {0}, rank: {1}, alpha: {2}, lambda: {3}, corruption: {4}, root: {5}"
                            progress.section(
                                format.format(algorithm, rank, alpha, lam,
                                              corruption, root))
                            RQ, Yt, Bias = params['models'][algorithm](
                                train,
                                embeded_matrix=np.empty((0)),
                                iteration=params['iter'],
                                rank=rank,
                                lam=lam,
                                alpha=alpha,
                                corruption=corruption,
                                root=root,
                                gpu_on=gpu_on)
                            Y = Yt.T

                            progress.subsection("Prediction")

                            prediction = predict(matrix_U=RQ,
                                                 matrix_V=Y,
                                                 measure=measure,
                                                 bias=Bias,
                                                 topK=params['topK'][-1],
                                                 matrix_Train=train,
                                                 gpu=gpu_on)

                            progress.subsection("Evaluation")

                            result = evaluate(prediction, validation,
                                              params['metric'], params['topK'])

                            result_dict = {
                                'model': algorithm,
                                'rank': rank,
                                'alpha': alpha,
                                'lambda': lam,
                                'iter': params['iter'],
                                'similarity': params['similarity'],
                                'corruption': corruption,
                                'root': root
                            }

                            for name in result.keys():
                                result_dict[name] = [
                                    round(result[name][0], 4),
                                    round(result[name][1], 4)
                                ]

                            df = df.append(result_dict, ignore_index=True)

                            save_dataframe_csv(df, table_path, save_path)
예제 #24
0
def general(train,
            test,
            params,
            model,
            measure='Cosine',
            gpu_on=True,
            analytical=False,
            model_folder='latent'):
    progress = WorkSplitter()

    columns = [
        'model', 'similarity', 'alpha', 'batch_size', 'corruption', 'epoch',
        'iteration', 'key_dimension', 'lambda', 'learning_rate',
        'mode_dimension', 'normalize', 'rank', 'root', 'topK'
    ]

    progress.section("\n".join(
        [":".join((str(k), str(params[k]))) for k in columns]))

    df = pd.DataFrame(columns=columns)

    if os.path.isfile('{2}/U_{0}_{1}.npy'.format(params['model'],
                                                 params['rank'],
                                                 model_folder)):

        RQ = np.load('{2}/U_{0}_{1}.npy'.format(params['model'],
                                                params['rank'], model_folder))
        Y = np.load('{2}/V_{0}_{1}.npy'.format(params['model'], params['rank'],
                                               model_folder))

        if os.path.isfile('{2}/B_{0}_{1}.npy'.format(params['model'],
                                                     params['rank'],
                                                     model_folder)):
            Bias = np.load('{2}/B_{0}_{1}.npy'.format(params['model'],
                                                      params['rank'],
                                                      model_folder))
        else:
            Bias = None

    else:

        RQ, Yt, Bias = model(train,
                             embedded_matrix=np.empty((0)),
                             mode_dim=params['mode_dimension'],
                             key_dim=params['key_dimension'],
                             batch_size=params['batch_size'],
                             learning_rate=params['learning_rate'],
                             iteration=params['iteration'],
                             epoch=params['epoch'],
                             rank=params['rank'],
                             corruption=params['corruption'],
                             gpu_on=gpu_on,
                             lamb=params['lambda'],
                             alpha=params['alpha'],
                             root=params['root'])

        Y = Yt.T
        """
        np.save('{2}/U_{0}_{1}'.format(params['model'], params['rank'], model_folder), RQ)
        np.save('{2}/V_{0}_{1}'.format(params['model'], params['rank'], model_folder), Y)
        if Bias is not None:
            np.save('{2}/B_{0}_{1}'.format(params['model'], params['rank'], model_folder), Bias)
        """

    progress.subsection("Prediction")

    prediction = predict(matrix_U=RQ,
                         matrix_V=Y,
                         measure=measure,
                         bias=Bias,
                         topK=params['topK'][-1],
                         matrix_Train=train,
                         gpu=gpu_on)

    progress.subsection("Evaluation")

    result = evaluate(prediction,
                      test,
                      params['metric'],
                      params['topK'],
                      analytical=analytical)

    if analytical:
        return result
    else:
        result_dict = params

        for name in result.keys():
            result_dict[name] = [
                round(result[name][0], 4),
                round(result[name][1], 4)
            ]
        df = df.append(result_dict, ignore_index=True)

        return df
예제 #25
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyper parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {0}".format(args.path))
    print("Train File Name: {0}".format(args.dataset + args.train))
    print("Uniform Train File Name: {0}".format(args.dataset + args.unif_train))
    print("Valid File Name: {0}".format(args.dataset + args.valid))
    print("Algorithm: {0}".format(args.model))
    print("Way: {0}".format(args.way))
    print("Seed: {0}".format(args.seed))
    print("Batch Size: {0}".format(args.batch_size))
    print("Rank: {0}".format(args.rank))
    print("Lambda: {0}".format(args.lamb))
    print("Iteration: {0}".format(args.iter))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()

    train = load_numpy(path=args.path, name=args.dataset + args.train)
    print("Elapsed: {0}".format(inhour(time.time() - start_time)))

    print("Train U-I Dimensions: {0}".format(train.shape))

    # Train Model
    valid = load_numpy(path=args.path, name=args.dataset + args.valid)
    unif_train = load_numpy(path=args.path, name=args.dataset + args.unif_train)
    RQ, Y, uBias, iBias = models[args.model](train, valid, dataset=args.dataset, matrix_unif_train=unif_train,
                                             iteration=args.iter, rank=args.rank, gpu_on=args.gpu, lam=args.lamb,
                                             lam2=args.lamb2, seed=args.seed, batch_size=args.batch_size, way=args.way,
                                             confidence=args.confidence, step=args.step)

    save_path = 'latent/' + args.dataset
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.way is None:
        np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ)
        np.save(save_path + '/V_{0}_{1}'.format(args.model, args.rank), Y)
        if uBias is not None:
            np.save(save_path + '/uB_{0}_{1}'.format(args.model, args.rank), uBias)
            np.save(save_path + '/iB_{0}_{1}'.format(args.model, args.rank), iBias)
    else:
        np.save(save_path + '/' + args.way + '_U_{0}_{1}'.format(args.model, args.rank), RQ)
        np.save(save_path + '/' + args.way + '_V_{0}_{1}'.format(args.model, args.rank), Y)
        if uBias is not None:
            np.save(save_path + '/' + args.way + '_uB_{0}_{1}'.format(args.model, args.rank), uBias)
            np.save(save_path + '/' + args.way + '_iB_{0}_{1}'.format(args.model, args.rank), iBias)

    progress.section("Predict")
    prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=valid, ubias=uBias, ibias=iBias, gpu=args.gpu)

    progress.section("Evaluation")
    start_time = time.time()
    metric_names = ['NLL', 'AUC']
    result = evaluate(prediction, valid, metric_names, gpu=args.gpu)

    print("----Final Result----")
    for metric in result.keys():
        print("{0}:{1}".format(metric, result[metric]))
    print("Elapsed: {0}".format(inhour(time.time() - start_time)))
예제 #26
0
def hyper_parameter_tuning(train,
                           validation,
                           params,
                           measure='Cosine',
                           gpu_on=True):
    progress = WorkSplitter()
    df = pd.DataFrame(columns=['model', 'rank', 'alpha', 'root', 'topK'])

    num_user = train.shape[0]

    for algorithm in params['models']:

        for rank in params['rank']:
            if 'alpha' in inspect.getargspec(params['models'][algorithm])[0]:
                alphas = params['alpha']
            else:
                alphas = [1]

            for alpha in alphas:

                if 'root' in inspect.getargspec(
                        params['models'][algorithm])[0]:
                    roots = params['root']
                else:
                    roots = [1]

                for root in roots:

                    progress.section(
                        "model: {0}, rank: {1}, root: {2}, alpha: {3}".format(
                            algorithm, rank, root, alpha))
                    RQ, Yt, Bias = params['models'][algorithm](
                        train,
                        embeded_matrix=np.empty((0)),
                        iteration=params['iter'],
                        rank=rank,
                        lam=params['lam'],
                        root=root,
                        alpha=alpha,
                        gpu_on=True)
                    Y = Yt.T

                    progress.subsection("Prediction")

                    prediction = predict(matrix_U=RQ,
                                         matrix_V=Y,
                                         measure=measure,
                                         bias=Bias,
                                         topK=params['topK'][-1],
                                         matrix_Train=train,
                                         gpu=gpu_on)

                    progress.subsection("Evaluation")

                    result = evaluate(prediction, validation, params['metric'],
                                      params['topK'])

                    result_dict = {
                        'model': algorithm,
                        'rank': rank,
                        'root': root,
                        'alpha': alpha
                    }

                    for name in result.keys():
                        result_dict[name] = [
                            round(result[name][0], 4),
                            round(result[name][1], 4)
                        ]

                    df = df.append(result_dict, ignore_index=True)
    return df
예제 #27
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyper parameter settings
    progress.section("Parameter Setting")
    print("Data Path: {0}".format(args.data_dir))
    print("Train File Name: {0}".format(args.train_set))
    if args.validation:
        print("Valid File Name: {0}".format(args.valid_set))
    print("Algorithm: {0}".format(args.model))
    if args.item == True:
        mode = "Item-based"
    else:
        mode = "User-based"
    print("Normalize: {0}".format(args.normalize))
    print("Mode: {0}".format(mode))
    print("Alpha: {0}".format(args.alpha))
    print("Rank: {0}".format(args.rank))
    print("Mode Dimension: {0}".format(args.mode_dim))
    print("Key Dimension: {0}".format(args.key_dim))
    print("Batch Size: {0}".format(args.batch_size))
    print("Optimizer: {0}".format(args.optimizer))
    print("Learning Rate: {0}".format(args.learning_rate))
    print("Lambda: {0}".format(args.lamb))
    print("SVD/Alter Iteration: {0}".format(args.iteration))
    print("Epoch: {0}".format(args.epoch))
    print("Corruption: {0}".format(args.corruption))
    print("Root: {0}".format(args.root))
    print("Evaluation Ranking Topk: {0}".format(args.topk))

    # Load Data
    progress.section("Loading Data")
    start_time = time.time()
    if args.shape is None:
        R_train = load_numpy(path=args.data_dir, name=args.train_set)
    else:
        # R_train = load_pandas(path=args.data_dir, name=args.train_set, shape=args.shape)
        R_train = load_csv(path=args.data_dir,
                           name=args.train_set,
                           shape=args.shape)

    print("Elapsed: {0}".format(inhour(time.time() - start_time)))

    print("Train U-I Dimensions: {0}".format(R_train.shape))

    # Item-Item or User-User
    if args.item == True:
        RQ, Yt, Bias = models[args.model](R_train,
                                          embedded_matrix=np.empty((0)),
                                          mode_dim=args.mode_dim,
                                          key_dim=args.key_dim,
                                          batch_size=args.batch_size,
                                          optimizer=args.optimizer,
                                          learning_rate=args.learning_rate,
                                          normalize=args.normalize,
                                          iteration=args.iteration,
                                          epoch=args.epoch,
                                          rank=args.rank,
                                          corruption=args.corruption,
                                          gpu_on=args.gpu,
                                          lamb=args.lamb,
                                          alpha=args.alpha,
                                          seed=args.seed,
                                          root=args.root)
        Y = Yt.T
    else:
        Y, RQt, Bias = models[args.model](R_train.T,
                                          embedded_matrix=np.empty((0)),
                                          mode_dim=args.mode_dim,
                                          key_dim=args.key_dim,
                                          batch_size=args.batch_size,
                                          optimizer=args.optimizer,
                                          learning_rate=args.learning_rate,
                                          normalize=args.normalize,
                                          iteration=args.iteration,
                                          rank=args.rank,
                                          corruption=args.corruption,
                                          gpu_on=args.gpu,
                                          lamb=args.lamb,
                                          alpha=args.alpha,
                                          seed=args.seed,
                                          root=args.root)
        RQ = RQt.T

    # np.save('latent/U_{0}_{1}'.format(args.model, args.rank), RQ)
    # np.save('latent/V_{0}_{1}'.format(args.model, args.rank), Y)
    # if Bias is not None:
    #     np.save('latent/B_{0}_{1}'.format(args.model, args.rank), Bias)

    progress.section("Predict")
    prediction = predict(matrix_U=RQ,
                         matrix_V=Y,
                         bias=Bias,
                         topK=args.topk,
                         matrix_Train=R_train,
                         measure=args.sim_measure,
                         gpu=args.gpu)
    if args.validation:
        progress.section("Create Metrics")
        start_time = time.time()

        metric_names = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision']
        R_valid = load_numpy(path=args.data_dir, name=args.valid_set)
        result = evaluate(prediction, R_valid, metric_names, [args.topk])
        print("-")
        for metric in result.keys():
            print("{0}:{1}".format(metric, result[metric]))
        print("Elapsed: {0}".format(inhour(time.time() - start_time)))
예제 #28
0
def hyper_parameter_tuning(train, validation, params, unif_train, save_path,
                           seed, way, dataset, gpu_on):
    progress = WorkSplitter()

    table_path = 'tables/'
    data_name = save_path.split('/')[0]
    save_dir = 'tables/' + data_name + '/'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for algorithm in params['models']:
        if algorithm in ['AutoRec']:
            df = pd.DataFrame(
                columns=['model', 'rank', 'batch_size', 'lambda', 'iter'])
            for rank in params['rank']:
                for batch_size in params['batch_size']:
                    for lam in params['lambda']:
                        format = "model: {0}, rank: {1}, batch_size: {2}, lambda: {3}"
                        progress.section(
                            format.format(algorithm, rank, batch_size, lam))
                        RQ, X, xBias, Y, yBias = params['models'][algorithm](
                            train,
                            validation,
                            matrix_unif_train=unif_train,
                            iteration=params['iter'],
                            rank=rank,
                            gpu_on=gpu_on,
                            lam=lam,
                            seed=seed,
                            batch_size=batch_size,
                            way=way,
                            dataset=dataset)

                        progress.subsection("Prediction")
                        prediction = predict(matrix_U=RQ,
                                             matrix_V=Y.T,
                                             matrix_Valid=validation,
                                             bias=yBias,
                                             gpu=gpu_on)

                        progress.subsection("Evaluation")
                        result = evaluate(prediction,
                                          validation,
                                          params['metric'],
                                          gpu=gpu_on)
                        result_dict = {
                            'model': algorithm,
                            'rank': rank,
                            'batch_size': batch_size,
                            'lambda': lam,
                            'iter': params['iter']
                        }
                        for name in result.keys():
                            result_dict[name] = round(result[name][0], 8)
                        df = df.append(result_dict, ignore_index=True)
                        save_dataframe_csv(df, table_path, save_path)
        elif algorithm in ['InitFeatureEmbedAE', 'ConcatFeatureEmbedAE']:
            df = pd.DataFrame(
                columns=['model', 'batch_size', 'lambda', 'iter'])
            for batch_size in params['batch_size']:
                for lam in params['lambda']:
                    format = "model: {0}, batch_size: {1}, lambda: {2}"
                    progress.section(format.format(algorithm, batch_size, lam))
                    RQ, X, xBias, Y, yBias = params['models'][algorithm](
                        train,
                        validation,
                        matrix_unif_train=unif_train,
                        iteration=params['iter'],
                        rank=params['rank'],
                        gpu_on=gpu_on,
                        lam=lam,
                        seed=seed,
                        batch_size=batch_size,
                        way=way,
                        dataset=dataset)

                    progress.subsection("Prediction")
                    prediction = predict(matrix_U=RQ,
                                         matrix_V=Y.T,
                                         matrix_Valid=validation,
                                         bias=yBias,
                                         gpu=gpu_on)

                    progress.subsection("Evaluation")
                    result = evaluate(prediction,
                                      validation,
                                      params['metric'],
                                      gpu=gpu_on)
                    result_dict = {
                        'model': algorithm,
                        'batch_size': batch_size,
                        'lambda': lam,
                        'iter': params['iter']
                    }
                    for name in result.keys():
                        result_dict[name] = round(result[name][0], 8)
                    df = df.append(result_dict, ignore_index=True)
                    save_dataframe_csv(df, table_path, save_path)
        elif algorithm in ['UnionSampleAE', 'RefineLabelAE']:
            df = pd.DataFrame(columns=['model', 'confidence', 'iter'])
            for conf in params['confidence']:
                format = "model: {0}, confidence: {1}"
                progress.section(format.format(algorithm, conf))
                RQ, X, xBias, Y, yBias = params['models'][algorithm](
                    train,
                    validation,
                    matrix_unif_train=unif_train,
                    iteration=params['iter'],
                    rank=params['rank'],
                    gpu_on=gpu_on,
                    lam=params['lambda'],
                    seed=seed,
                    batch_size=params['batch_size'],
                    way=way,
                    confidence=conf,
                    dataset=dataset)

                progress.subsection("Prediction")
                prediction = predict(matrix_U=RQ,
                                     matrix_V=Y.T,
                                     matrix_Valid=validation,
                                     bias=yBias,
                                     gpu=gpu_on)

                progress.subsection("Evaluation")
                result = evaluate(prediction,
                                  validation,
                                  params['metric'],
                                  gpu=gpu_on)
                result_dict = {
                    'model': algorithm,
                    'confidence': conf,
                    'iter': params['iter']
                }
                for name in result.keys():
                    result_dict[name] = round(result[name][0], 8)
                df = df.append(result_dict, ignore_index=True)
                save_dataframe_csv(df, table_path, save_path)
        elif algorithm in ['BatchSampleAE']:
            df = pd.DataFrame(columns=['model', 'step', 'iter'])
            for step in params['step']:
                format = "model: {0}, step: {1}"
                progress.section(format.format(algorithm, step))
                RQ, X, xBias, Y, yBias = params['models'][algorithm](
                    train,
                    validation,
                    matrix_unif_train=unif_train,
                    iteration=params['iter'],
                    rank=params['rank'],
                    gpu_on=gpu_on,
                    lam=params['lambda'],
                    seed=seed,
                    batch_size=params['batch_size'],
                    way=way,
                    step=step,
                    dataset=dataset)

                progress.subsection("Prediction")
                prediction = predict(matrix_U=RQ,
                                     matrix_V=Y.T,
                                     matrix_Valid=validation,
                                     bias=yBias,
                                     gpu=gpu_on)

                progress.subsection("Evaluation")
                result = evaluate(prediction,
                                  validation,
                                  params['metric'],
                                  gpu=gpu_on)
                result_dict = {
                    'model': algorithm,
                    'step': step,
                    'iter': params['iter']
                }
                for name in result.keys():
                    result_dict[name] = round(result[name][0], 8)
                df = df.append(result_dict, ignore_index=True)
                save_dataframe_csv(df, table_path, save_path)
        elif algorithm in ['BridgeLabelAE']:
            df = pd.DataFrame(columns=['model', 'lambda', 'lambda2', 'iter'])
            for lam in params['lambda']:
                for lam2 in params['lambda2']:
                    format = "model: {0}, lambda: {1}, lambda2: {2}"
                    progress.section(format.format(algorithm, lam, lam2))
                    RQ, X, xBias, Y, yBias = params['models'][algorithm](
                        train,
                        validation,
                        matrix_unif_train=unif_train,
                        iteration=params['iter'],
                        rank=params['rank'],
                        gpu_on=gpu_on,
                        lam=lam,
                        lam2=lam2,
                        seed=seed,
                        batch_size=params['batch_size'],
                        way=way,
                        dataset=dataset)

                    progress.subsection("Prediction")
                    prediction = predict(matrix_U=RQ,
                                         matrix_V=Y.T,
                                         matrix_Valid=validation,
                                         bias=yBias,
                                         gpu=gpu_on)

                    progress.subsection("Evaluation")
                    result = evaluate(prediction,
                                      validation,
                                      params['metric'],
                                      gpu=gpu_on)
                    result_dict = {
                        'model': algorithm,
                        'lambda': lam,
                        'lambda2': lam2,
                        'iter': params['iter']
                    }
                    for name in result.keys():
                        result_dict[name] = round(result[name][0], 8)
                    df = df.append(result_dict, ignore_index=True)
                    save_dataframe_csv(df, table_path, save_path)
        elif algorithm in ['SoftLabelAE']:
            df = pd.DataFrame(columns=['model', 'confidence', 'tau', 'iter'])
            for conf in params['confidence']:
                for tau in params['tau']:
                    format = "model: {0}, confidence: {1}, tau: {2}"
                    progress.section(format.format(algorithm, conf, tau))
                    RQ, X, xBias, Y, yBias, Z, zBias, K, kBias = params[
                        'models'][algorithm](train,
                                             validation,
                                             matrix_unif_train=unif_train,
                                             iteration=params['iter'],
                                             rank=params['rank'],
                                             rank2=params['rank2'],
                                             gpu_on=gpu_on,
                                             lam=params['lambda'],
                                             seed=seed,
                                             batch_size=params['batch_size'],
                                             confidence=conf,
                                             tau=tau,
                                             dataset=dataset)

                    progress.subsection("Prediction")
                    prediction = predict(matrix_U=RQ,
                                         matrix_V=K.T,
                                         matrix_Valid=validation,
                                         bias=yBias,
                                         gpu=gpu_on)

                    progress.subsection("Evaluation")
                    result = evaluate(prediction,
                                      validation,
                                      params['metric'],
                                      gpu=gpu_on)
                    result_dict = {
                        'model': algorithm,
                        'confidence': conf,
                        'tau': tau,
                        'iter': params['iter']
                    }
                    for name in result.keys():
                        result_dict[name] = round(result[name][0], 8)
                    df = df.append(result_dict, ignore_index=True)
                    save_dataframe_csv(df, table_path, save_path)
        elif algorithm in ['HintAE']:
            df = pd.DataFrame(columns=['model', 'confidence', 'iter'])
            for conf in params['confidence']:
                format = "model: {0}, confidence: {1}"
                progress.section(format.format(algorithm, conf))
                RQ, X, xBias, Y, yBias, Z, zBias, K, kBias = params['models'][
                    algorithm](train,
                               validation,
                               matrix_unif_train=unif_train,
                               iteration=params['iter'],
                               rank=params['rank'],
                               rank2=params['rank2'],
                               gpu_on=gpu_on,
                               lam=params['lambda'],
                               seed=seed,
                               batch_size=params['batch_size'],
                               confidence=conf,
                               dataset=dataset)

                progress.subsection("Prediction")
                prediction = predict(matrix_U=RQ,
                                     matrix_V=K.T,
                                     matrix_Valid=validation,
                                     bias=yBias,
                                     gpu=gpu_on)

                progress.subsection("Evaluation")
                result = evaluate(prediction,
                                  validation,
                                  params['metric'],
                                  gpu=gpu_on)
                result_dict = {
                    'model': algorithm,
                    'confidence': conf,
                    'iter': params['iter']
                }
                for name in result.keys():
                    result_dict[name] = round(result[name][0], 8)
                df = df.append(result_dict, ignore_index=True)
                save_dataframe_csv(df, table_path, save_path)