Exemple #1
0
def find_best_model(config, n_user, n_item):
    best_model = None
    best_model_dir = None
    best_params = {}
    best_ndcg = 0
    for batch_size in map(int, config['MODEL']['batch_size'].split()):
        for lr in map(float, config['MODEL']['lr'].split()):
            for latent_dim in map(int, config['MODEL']['latent_dim'].split()):
                for l2_weight in map(float,
                                     config['MODEL']['l2_weight'].split()):
                    result_dir = "data/train_result/batch_size_{}-lr_{}-latent_dim_{}-l2_weight_{}-epoch_{}-n_negative_{}-top_k_{}".format(
                        batch_size, lr, latent_dim, l2_weight,
                        config['MODEL']['epoch'],
                        config['MODEL']['n_negative'],
                        config['EVALUATION']['top_k'])
                    with open(os.path.join(result_dir,
                                           'epoch_data.json')) as f:
                        ndcg = max([d['NDCG'] for d in json.load(f)])
                        if ndcg > best_ndcg:
                            best_ndcg = ndcg
                            best_params = {
                                'batch_size': batch_size,
                                'lr': lr,
                                'latent_dim': latent_dim,
                                'l2_weight': l2_weight
                            }
                            tf.reset_default_graph()
                            best_model = MF(n_user, n_item, lr, latent_dim,
                                            l2_weight)
                            best_model_dir = result_dir
    return best_model, best_model_dir, best_params
 def __init__(self, data_items):
     self.name = 'SVD_Explainable'
     self.data_items = data_items
     self.explanations_matrix = pd.read_csv(
         'explanation_matrix_user_based_weighted.csv')
     self.mf = MF(data_items.values,
                  K=50,
                  alpha=0.01,
                  beta=0.01,
                  lambda_=0.01,
                  iterations=100,
                  explainability_matrix=self.explanations_matrix)
     self.mf.train()
     self.predicted_matrix = pd.DataFrame(self.mf.full_matrix(),
                                          index=data_items.index,
                                          columns=data_items.columns)
     self.Q = self.mf.Q
Exemple #3
0
def find_best_model(config, n_user, n_item):
    best_model = None
    best_params = {}
    best_ndcg = 0
    for batch_size in map(int, config['MODEL']['batch_size'].split()):
        for lr in map(float, config['MODEL']['lr'].split()):
            for latent_dim in map(int, config['MODEL']['latent_dim'].split()):
                for l2_reg in map(float, config['MODEL']['l2_reg'].split()):
                    result_dir = "data/train_result/batch_size_{}-lr_{}-latent_dim_{}-l2_reg_{}-epoch_{}-n_negative_{}-top_k_{}".format(
                        batch_size, lr, latent_dim, l2_reg,
                        config['MODEL']['epoch'],
                        config['MODEL']['n_negative'],
                        config['EVALUATION']['top_k'])
                    with open(os.path.join(result_dir,
                                           'epoch_data.json')) as f:
                        ndcg = max([d['NDCG'] for d in json.load(f)])
                        if ndcg > best_ndcg:
                            best_ndcg = ndcg
                            best_params = {
                                'batch_size': batch_size,
                                'lr': lr,
                                'latent_dim': latent_dim,
                                'l2_reg': l2_reg
                            }
                            model = MF(n_user, n_item, latent_dim)
                            model.to('cuda:0')
                            model.load_state_dict(
                                torch.load(
                                    os.path.join(result_dir, 'model.pth')))
                            best_model = model
    return best_model, best_params
Exemple #4
0
 def __init__(self, params, device='cuda:0'):
     if params.method == 'tenet' and 'gnn' in params.include_networks and 'seq' in params.include_networks:
         self.model = Tenet_Gnn_Seq(params, device)
     elif params.method == 'tenet' and 'gnn' in params.include_networks:
         self.model = Tenet_Gnn_Seq(params, device)
     elif params.method == 'tenet' and 'seq' in params.include_networks:
         self.model = Tenet_Gnn_Seq(params, device)
     elif params.method == 'bpr':
         self.model = BPR(params)
     elif params.method == 'mf':
         self.model = MF(params)
     elif params.method == 'gmf':
         self.model = GMF(params)
Exemple #5
0
def main():
    config = configparser.ConfigParser()
    config.read('MF_PyTorch/config.ini')

    data_splitter = data.DataSplitter()
    validation_data = data_splitter.make_evaluation_data('validation')
    test_data = data_splitter.make_evaluation_data('test')

    for batch_size in map(int, config['MODEL']['batch_size'].split()):
        for lr in map(float, config['MODEL']['lr'].split()):
            for latent_dim in map(int, config['MODEL']['latent_dim'].split()):
                for l2_reg in map(float, config['MODEL']['l2_reg'].split()):
                    print(
                        'batch_size = {}, lr = {}, latent_dim = {}, l2_reg = {}'
                        .format(batch_size, lr, latent_dim, l2_reg))
                    model = MF(data_splitter.n_user, data_splitter.n_item,
                               latent_dim)
                    model.to('cuda:0')

                    opt = optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=l2_reg)
                    criterion = nn.BCELoss()
                    epoch_data, best_model_state_dict = train(
                        model, opt, criterion, data_splitter, validation_data,
                        config)
                    save_train_result(best_model_state_dict, epoch_data,
                                      batch_size, lr, latent_dim, l2_reg,
                                      config)

    best_model, best_params = find_best_model(config, data_splitter.n_user,
                                              data_splitter.n_item)
    hit_ratio, ndcg = evaluation.evaluate(best_model, test_data,
                                          config.getint('EVALUATION', 'top_k'))
    print('---------------------------------\nBest result')
    print('batch_size = {}, lr = {}, latent_dim = {}, l2_reg = {}'.format(
        best_params['batch_size'], best_params['lr'],
        best_params['latent_dim'], best_params['l2_reg']))
    print('HR = {:.4f}, NDCG = {:.4f}'.format(hit_ratio, ndcg))
Exemple #6
0
def main():
    config = configparser.ConfigParser()
    config.read('MF_TensorFlow/config.ini')

    data_splitter = data.DataSplitter()
    validation_data = data_splitter.make_evaluation_data('validation')
    test_data = data_splitter.make_evaluation_data('test')

    for batch_size in map(int, config['MODEL']['batch_size'].split()):
        for lr in map(float, config['MODEL']['lr'].split()):
            for latent_dim in map(int, config['MODEL']['latent_dim'].split()):
                for l2_weight in map(float,
                                     config['MODEL']['l2_weight'].split()):
                    print(
                        'batch_size = {}, lr = {}, latent_dim = {}, l2_weight = {}'
                        .format(batch_size, lr, latent_dim, l2_weight))
                    result_dir = "data/train_result/batch_size_{}-lr_{}-latent_dim_{}-l2_weight_{}-epoch_{}-n_negative_{}-top_k_{}".format(
                        batch_size, lr, latent_dim, l2_weight,
                        config['MODEL']['epoch'],
                        config['MODEL']['n_negative'],
                        config['EVALUATION']['top_k'])
                    os.makedirs(result_dir, exist_ok=True)
                    tf.reset_default_graph()
                    model = MF(data_splitter.n_user, data_splitter.n_item, lr,
                               latent_dim, l2_weight)
                    epoch_data = train(result_dir, model, data_splitter,
                                       validation_data, batch_size, config)
                    save_train_result(result_dir, epoch_data)

    best_model, best_model_dir, best_params = find_best_model(
        config, data_splitter.n_user, data_splitter.n_item)
    with tf.Session() as sess:
        tf.train.Saver().restore(sess, os.path.join(best_model_dir, 'model'))
        hit_ratio, ndcg = evaluation.evaluate(
            best_model, sess, test_data, config.getint('EVALUATION', 'top_k'))
        print('---------------------------------\nBest result')
        print(
            'batch_size = {}, lr = {}, latent_dim = {}, l2_weight = {}'.format(
                best_params['batch_size'], best_params['lr'],
                best_params['latent_dim'], best_params['l2_weight']))
        print('HR = {:.4f}, NDCG = {:.4f}'.format(hit_ratio, ndcg))
Exemple #7
0
rate_train = ratings_base.as_matrix()
rate_test = ratings_test.as_matrix()

# indices start from 0 cho user_id và item_id
rate_train[:, :2] -= 1
rate_test[:, :2] -= 1

# user-based

# rs = MF(rate_train, K = 10, lam = .1, print_every = 10,
#     learning_rate = 0.75, max_iter = 100, user_based = 1)
# rs.fit()
# # evaluate on test data
# RMSE = rs.evaluate_RMSE(rate_test)
# print ('\nUser-based MF, RMSE =', RMSE)

# item-based

rs = MF(rate_train,
        K=10,
        lam=.1,
        print_every=10,
        learning_rate=0.75,
        max_iter=100,
        user_based=0)
rs.fit()
# evaluate on test data
RMSE = rs.evaluate_RMSE(rate_test)
print('\nItem-based MF, RMSE =', RMSE)
class SVD_Explainable(Strategy):
    def __init__(self, data_items):
        self.name = 'SVD_Explainable'
        self.data_items = data_items
        self.explanations_matrix = pd.read_csv(
            'explanation_matrix_user_based_weighted.csv')
        self.mf = MF(data_items.values,
                     K=50,
                     alpha=0.01,
                     beta=0.01,
                     lambda_=0.01,
                     iterations=100,
                     explainability_matrix=self.explanations_matrix)
        self.mf.train()
        self.predicted_matrix = pd.DataFrame(self.mf.full_matrix(),
                                             index=data_items.index,
                                             columns=data_items.columns)
        self.Q = self.mf.Q

    def get_users_of_project(self, project):
        users_of_project = self.data_items[project]
        users_of_project = users_of_project[users_of_project > 0].index.values
        return users_of_project

    def get_user_projects(self, user_id):
        known_user_likes = self.data_items.loc[user_id]
        known_user_likes = known_user_likes[known_user_likes > 0].index.values
        return known_user_likes

    def calc_explanation_score_user_based(self, user_id, project,
                                          cf_user_user):
        k = 50
        similar_users = cf_user_user.find_k_similar_users(user_id, k=k).index
        user_liked_project = self.get_users_of_project(project)
        return len(np.intersect1d(similar_users,
                                  user_liked_project)) / len(similar_users)

    def calc_explanation_score_item_based(self, user_id, project,
                                          cf_item_item):
        k = 10
        similar_projects = cf_item_item.get_k_similar_projects(project, k=k)
        known_user_projects = self.get_user_projects(user_id)
        return len(np.intersect1d(similar_projects,
                                  known_user_projects)) / len(similar_projects)

    def get_explanations_matrix(self):
        i = 0
        #cf_item_item = CFItemItem(self.data_items)
        cf_user_user = CFUserUser(self.data_items)
        explanation_matrix = pd.DataFrame(0,
                                          columns=self.data_items.columns,
                                          index=self.data_items.index)
        print(explanation_matrix.shape)
        for user_id in explanation_matrix.index:
            print(i)
            i += 1
            for project in explanation_matrix.columns:
                explanation_matrix.loc[user_id][
                    project] = self.calc_explanation_score_user_based(
                        user_id, project, cf_user_user)
        return explanation_matrix

    def get_recommendations(self, user_index, known_user_projects, k,
                            ip_address):
        qt_df = pd.DataFrame(self.Q.T, columns=self.data_items.columns)
        projects_predicted_ratings = \
            [[i, np.dot(np.dot(self.data_items.loc[user_index], self.Q.T.transpose()), qt_df[i])]
             for i in self.data_items.columns
             if i not in known_user_projects]

        # projects_predicted_ratings = \
        #     [[project, self.predicted_matrix.loc[user_index][project]]
        #      for project in self.data_items.columns
        #      if project not in known_user_projects]
        projects_predicted_ratings = sorted(projects_predicted_ratings,
                                            key=lambda i: i[1],
                                            reverse=True)
        self.projects_predicted_ratings = projects_predicted_ratings
        self.user = user_index
        projects_predicted_ratings = [i[0] for i in projects_predicted_ratings]
        projects_predicted_ratings = self.remove_non_active_projects(
            projects_predicted_ratings)
        # projects_predicted_ratings = self.remove_unreachable_projects(projects_predicted_ratings, ip_address)
        return projects_predicted_ratings[:k]

    @staticmethod
    def remove_non_active_projects(recommended_projects):
        from Recommender import non_active_projects
        return [
            project for project in recommended_projects
            if project not in non_active_projects['project'].values
        ]

    @staticmethod
    def remove_unreachable_projects(recommended_projects, ip_address):
        user_loc = get_user_loc(ip_address)
        return [
            project for project in recommended_projects
            if is_project_reachable_to_user(user_loc, project)
        ]

    def get_highest_online_project(self):
        from Recommender import is_online_project, recommend_default_online
        online_similar_projects = list(
            filter(lambda x: is_online_project(x[0]),
                   self.projects_predicted_ratings))
        if len(online_similar_projects) == 0:
            return recommend_default_online(self.user)
        return online_similar_projects[0][0]
Exemple #9
0
interpolate_mod_trading_df = pd.read_csv("interpolate_mod_trading.csv",
                                         index_col=0)
interpolate_mod_trading_df.index = pd.to_datetime(
    interpolate_mod_trading_df.index)

interpolate_mod_trading_df['SMA_60'] = interpolate_mod_trading_df[
    'High'].rolling(window=60).mean()
interpolate_mod_trading_df['SMA_15'] = interpolate_mod_trading_df[
    'High'].rolling(window=15).mean()
interpolate_mod_trading_df['MA_diff'] = interpolate_mod_trading_df[
    'SMA_15'] - interpolate_mod_trading_df['SMA_60']

sorted_diff = interpolate_mod_trading_df['2012-01-03':'2013-01-04'][
    'MA_diff'].sort_values(ascending=True).dropna()
# sorted_diff = interpolate_mod_trading_df['MA_diff'].sort_values(ascending=True).dropna()

sorted_diff.reset_index(inplace=True, drop=True)

mf_instance = MF(sorted_diff)
mf_function = mf_instance.get_mf()

extent = 'EL'
diff = [-20]
value = mf_function(extent, diff)
value1 = mf_function('VL', diff)
value2 = mf_function('L', diff)

value3 = mf_function('VL', [-20, -40])

pass
Exemple #10
0
from MF import MF
import numpy as np
import pandas as pd

#Reading ratings file:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']

ratings = pd.read_csv('ml-100k/u.data',
                      sep='\t',
                      names=r_cols,
                      encoding='latin-1')
R = np.array(
    ratings.pivot(index='user_id', columns='movie_id',
                  values='rating').fillna(0))
mf = MF(R, K=20, alpha=0.001, beta=0.01, iterations=100)
training_process = mf.train()
print()
print("P x Q:")
print(mf.full_matrix())
print()
Exemple #11
0
init_alpha = 10e-6
init_beta = 10e-6

# init
## store cell information (whether each cell is valid or null)
S_ = rating_matrix
Sbin = (S_ > 0).astype(
    'int')  # binary (0 or 1) of matrix for excepting 0 value

## scale S
S = (S_ - 1) / np.max(S_)  # S ~ [0, 1] . Assume that minimun value is 1.
S = 2 * r * S - r  # S ~ [-r, r]

## initialize B, D, X, Y
m, n = S.shape
B, D = MF(S, r, Sbin, maxsteps=200, alpha=init_alpha, beta=init_beta)

ret['rating_matrix'] = rating_matrix
ret['items'] = items
ret['S'] = S
ret['r'] = r
ret['Sbin'] = Sbin
ret['B_MF'] = B
ret['D_MF'] = D

B = np.sign(B)  # r x m: user codes
D = np.sign(D)  # r x n: item codes
X = Update_XY(B, r)
Y = Update_XY(D, r)

ret['B'] = B