예제 #1
0
    def __init__(self, sess, num_neg = 0, top_K = [5,10], num_ranking_neg = 0,
                 gmf_num_factors=16, gmf_regs_emb=[0,0],
                 mlp_num_factors=16, mlp_layers=[20,10], mlp_regs_emb=[0,0], mlp_regs_layer=[0,0],
                 lr=0.001,
                 epochs=100, batch_size=128, T=10**3, verbose=False):
        '''Constructor'''
        # Parse the arguments and store them in the model
        self.session = sess
        self.num_neg = num_neg
        self.topk = top_K
        self.num_ranking_neg = num_ranking_neg

        self.gmf_num_factors = gmf_num_factors
        self.gmf_regs_user, self.gmf_regs_item = gmf_regs_emb

        self.mlp_num_factors = mlp_num_factors
        self.mlp_layers = mlp_layers
        self.mlp_regs_layer = mlp_regs_layer
        self.mlp_regs_user, self.mlp_regs_item = mlp_regs_emb

        self.lr = lr

        self.epochs = epochs
        self.batch_size = batch_size
        self.skip_step = T
        self.verbose = verbose
        gtl.print_paras(inspect.currentframe())
예제 #2
0
파일: BPRMF.py 프로젝트: bennetyf/RecSys
    def __init__(self,
                 sess,
                 top_K=10,
                 num_ranking_neg=0,
                 num_factors=32,
                 regs_ui=[0, 0],
                 lr=0.001,
                 epochs=100,
                 batch_size=128,
                 T=10**3,
                 verbose=False):

        self.session = sess
        self.topK = top_K
        self.num_ranking_neg = num_ranking_neg

        self.num_factors = num_factors
        self.num_neg = 1

        self.regs_user, self.regs_item = regs_ui

        self.lr = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.skip_step = T
        self.verbose = verbose

        gtl.print_paras(inspect.currentframe())
예제 #3
0
파일: GMF.py 프로젝트: bennetyf/RecSys
    def train_one_epoch(self, epoch):
        uid, iid, lb = gtl.shuffle_list(self.train_uid, self.train_iid,
                                        self.train_labels)

        n_batches, total_loss, total_mae, total_rms = 0, 0, 0, 0
        for i in range(self.num_batch):
            batch_user = uid[i * self.batch_size:(i + 1) * self.batch_size]
            batch_item = iid[i * self.batch_size:(i + 1) * self.batch_size]
            batch_labels = lb[i * self.batch_size:(i + 1) * self.batch_size]

            _, l, mae, rms = self.session.run(
                [self.opt, self.loss, self.mae, self.rms],
                feed_dict={
                    self.uid: batch_user,
                    self.iid: batch_item,
                    self.labels: batch_labels
                })
            n_batches += 1
            total_loss += l
            total_mae += mae
            total_rms += rms

            if self.verbose:
                if n_batches % self.skip_step == 0:
                    print(
                        "Epoch {0} Batch {1}: [Loss] = {2} [MAE] = {3}".format(
                            epoch, n_batches, total_loss / n_batches,
                            total_mae / n_batches))
        if self.verbose:
            print("Epoch {0}: [Loss] {1}".format(epoch,
                                                 total_loss / n_batches))
            print("Epoch {0}: [MAE] {1} and [RMS] {2}".format(
                epoch, total_mae / n_batches, total_rms / n_batches))
예제 #4
0
파일: BPRMF.py 프로젝트: bennetyf/RecSys
    def train_one_epoch(self, epoch):
        uid, iid, lb = gtl.shuffle_list(self.train_uid, self.train_iid,
                                        self.train_labels)
        neg_iid = [random.choice(self.neg_dict[u]) for u in uid]
        # neg_iid = [np.random.choice(self.neg_dict[u], 1).item() for u in self.train_uid] #This is very slow
        # neg_iid = [self.neg_dict[u][np.random.randint(len(self.neg_dict[u]))] for u in uid] #This is much faster

        n_batches, total_loss = 0, 0
        for i in range(self.num_batch):
            batch_user = uid[i * self.batch_size:(i + 1) * self.batch_size]
            batch_item = iid[i * self.batch_size:(i + 1) * self.batch_size]
            batch_labels = lb[i * self.batch_size:(i + 1) * self.batch_size]
            batch_neg_iid = neg_iid[i * self.batch_size:(i + 1) *
                                    self.batch_size]
            # Randomly select one negative item j for each user
            # batch_neg_iid = [np.random.choice(self.neg_dict[u],1).item() for u in batch_user]

            _, l = self.session.run(
                [self.opt, self.loss],
                feed_dict={
                    self.uid: batch_user,
                    self.iid: batch_item,
                    self.neg_iid: batch_neg_iid
                })
            n_batches += 1
            total_loss += l

            if self.verbose:
                if n_batches % self.skip_step == 0:
                    print("Epoch {0} Batch {1}: [Loss] = {2}".format(
                        epoch, n_batches, total_loss / n_batches))
        if self.verbose:
            print("Epoch {0}: [Loss] {1}".format(epoch,
                                                 total_loss / n_batches))
예제 #5
0
파일: Adv_MF.py 프로젝트: bennetyf/RecSys
    def __init__(self,
                 sess,
                 top_K,
                 num_factors=32,
                 reg=0.0,
                 reg_adv=0.0,
                 noise_type='random',
                 is_adv=False,
                 eps=0.5,
                 lr=0.001,
                 is_prec=False,
                 save_T=50,
                 epochs=100,
                 batch_size=128,
                 T=10**3,
                 verbose=False):

        # Parse the arguments and store them in the model
        self.session = sess

        self.num_factors = num_factors
        self.reg = reg
        self.reg_adv = reg_adv
        self.noise_type = noise_type
        self.is_adv = is_adv

        self.eps = eps
        self.topK = top_K

        self.lr = lr
        self.is_prec = is_prec

        self.save_T = save_T

        self.epochs = epochs
        self.batch_size = batch_size
        self.skip_step = T
        self.verbose = verbose

        self.metric1, self.metric2 = [], []

        gtl.print_paras(inspect.currentframe())
예제 #6
0
파일: Adv_MF.py 프로젝트: bennetyf/RecSys
    def train_one_epoch(self, epoch):
        uid, iid = gtl.shuffle_list(self.train_uid, self.train_iid)

        # start_time = time.time()
        iid_neg = [random.choice(self.neg_dict[u]) for u in uid]
        # iid_neg = [np.random.choice(self.neg_dict[u]).item() for u in uid]
        # iid_neg = [self.neg_dict[u][np.random.randint(len(self.neg_dict[u]))] for u in uid]
        # print("Time={0}".format(time.time()-start_time))

        n_batches, total_loss = 0, 0
        for i in range(self.num_batch):
            if i == self.num_batch - 1:
                # break
                batch_uids = uid[i * self.batch_size:]
                batch_iids_pos = iid[i * self.batch_size:]
                batch_iids_neg = iid_neg[i * self.batch_size:]
            else:
                batch_uids = uid[i * self.batch_size:(i + 1) * self.batch_size]
                batch_iids_pos = iid[i * self.batch_size:(i + 1) *
                                     self.batch_size]
                batch_iids_neg = iid_neg[i * self.batch_size:(i + 1) *
                                         self.batch_size]
                # Randomly select one negative item j for each user
                # batch_iids_neg = [np.random.choice(self.neg_dict[u], 1).item() for u in batch_uids]

            feed_dict = {
                self.uid: batch_uids,
                self.pos_iid: batch_iids_pos,
                self.neg_iid: batch_iids_neg,
            }

            if self.is_adv:
                self.session.run([self.update_P, self.update_Q], feed_dict)
            _, l = self.session.run([self.optimizer, self.loss], feed_dict)

            n_batches += 1
            total_loss += l

            if self.verbose:
                if n_batches % self.skip_step == 0:
                    print("[All] Training Epoch {0} Batch {1}: [Loss] = {2}".
                          format(epoch, n_batches, total_loss / n_batches))

        if self.verbose:
            print("[Epoch Average] Training Epoch {0}: [Loss] {1}".format(
                epoch, total_loss / n_batches))
예제 #7
0
    def train_one_epoch(self, epoch):
        uid1, iid1, lb1, uid2, iid2, lb2 = gtl.shuffle_list(
            self.train_uid1, self.train_iid1, self.train_labels1,
            self.train_uid2, self.train_iid2, self.train_labels2)
        # uid1, iid1, lb1, uid2, iid2, lb2 = list(uid1), list(iid1), list(lb1), list(uid2), list(iid2), list(lb2)

        n_batches = 0
        total_loss = 0
        total_mae1 = 0
        total_mae2 = 0
        for i in range(self.num_batch):
            batch_user1 = uid1[i * self.batch_size:(i + 1) * self.batch_size]
            batch_user2 = uid2[i * self.batch_size:(i + 1) * self.batch_size]
            batch_item1 = iid1[i * self.batch_size:(i + 1) * self.batch_size]
            batch_item2 = iid2[i * self.batch_size:(i + 1) * self.batch_size]
            batch_labels1 = lb1[i * self.batch_size:(i + 1) * self.batch_size]
            batch_labels2 = lb2[i * self.batch_size:(i + 1) * self.batch_size]

            _, l, mae1, mae2 = self.session.run(
                [self.opt, self.loss, self.mae1, self.mae2],
                feed_dict={
                    self.dom1_uid: batch_user1,
                    self.dom2_uid: batch_user2,
                    self.dom1_iid: batch_item1,
                    self.dom2_iid: batch_item2,
                    self.dom1_labels: batch_labels1,
                    self.dom2_labels: batch_labels2
                })
            n_batches += 1
            total_loss += l
            total_mae1 += mae1
            total_mae2 += mae2
            # total_mae1 += evl.evalMAE(batch_labels1,np.round(pred1))
            # total_mae2 += evl.evalMAE(batch_labels2,np.round(pred2))
            if self.verbose:
                if n_batches % self.skip_step == 0:
                    print(
                        "Epoch {0} Batch {1}: [Loss] = {2} [MAE] = {3}".format(
                            epoch, n_batches, total_loss / n_batches,
                            (total_mae1 + total_mae2) / (2 * n_batches)))

        print("Epoch {0}: [Loss] {1}".format(epoch, total_loss / n_batches))
        print("Epoch {0}: [MAE] {1} and {2}".format(epoch,
                                                    total_mae1 / n_batches,
                                                    total_mae2 / n_batches))
예제 #8
0
    num_epochs, batch_size, lr,\
    regs_ui, regs_bias, num_factors = \
    args.epochs, args.batch_size, args.lr,\
    args.regs_ui, args.regs_bias,args.nfactors

    regs_ui = list(np.float32(eval(regs_ui)))
    regs_bias = list(np.float32(eval(regs_bias)))

    # original_matrix = mtl.load_original_matrix('Data/ml-1m/ratings.dat', header=['uid', 'iid', 'ratings', 'time'], sep='::')

    # train_matrix, test_matrix = mtl.matrix_split(original_matrix, opt='prediction', mode='user', test_size=0.1, seed=42)
    # gtl.matrix_to_mat('SVD_ML1M_90.mat',opt='coo', original=original_matrix, train=train_matrix, test=test_matrix)
    # gtl.matrix_to_excel('svd_all.xlsx',opt='coo',train_all=train_matrix, test_all=test_matrix)
    # print("Saved!")

    data = gtl.load_mat_as_matrix('Data/SVD_ML1M_90.mat', opt='coo')
    original_matrix, train_matrix, test_matrix = data['original'], data[
        'train'], data['test']

    gpu_options = tf.GPUOptions(allow_growth=True)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          intra_op_parallelism_threads=8,
                                          inter_op_parallelism_threads=8,
                                          gpu_options=gpu_options)) as sess:
        model = SVD(sess,
                    num_factors=num_factors,
                    regs_ui=regs_ui,
                    regs_bias=regs_bias,
                    lr=lr,
                    epochs=num_epochs,
예제 #9
0
    args.topk, args.ranking_ratio, args.nfactors, args.out_neg_ratio, args.epsilon,\
    args.ae_regs

    ae_regs = list(np.float32(eval(ae_regs)))

    # original_matrix \
    #     = mtl.load_original_matrix(datafile='Data/ml-1m/ratings.dat', header=['uid', 'iid', 'ratings', 'time'], sep='::')

    # train_matrix, test_matrix = mtl.matrix_split(original_matrix, opt='prediction', mode='user', test_size=0.1, seed=10)

    # original_matrix = mtl.matrix_to_binary(original_matrix, 0)
    # train_matrix, test_matrix = mtl.matrix_to_binary(train_matrix,0), mtl.matrix_to_binary(test_matrix,0)

    # gtl.matrix_to_mat('Data/ML1M_90_Data.mat', opt='coo', original=original_matrix, train=train_matrix, test=test_matrix)
    #
    data = gtl.load_mat_as_matrix('Data/ML1M_90_Data.mat', opt='coo')
    original_matrix, train_matrix, test_matrix = data['original'], data['train'], data['test']

    gpu_options = tf.GPUOptions(allow_growth=True)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          intra_op_parallelism_threads=8,
                                          inter_op_parallelism_threads=8,
                                          gpu_options=gpu_options)) as sess:
        model = GAN_AE(sess,
                     top_K=topK, ranking_list_ratio=ranking_list_ratio, neg_ratio=neg_ratio,
                     num_factors=num_factors, ae_regs=ae_regs, eps=epsilon,
                     lr=lr,
                     epochs=num_epochs, batch_size=batch_size, T=1000, verbose=True)

        model.prepare_data(original_matrix=original_matrix, train_matrix=train_matrix, test_matrix=test_matrix)
        model.build_model()
예제 #10
0
             'flixster-5':      'Data/flixster-hr-5.mat',
             'ymov-full':       'Data/ymov-hr-full.mat',
             'ymov-345':        'Data/ymov-hr-345.mat',
             'ymov-45':         'Data/ymov-hr-45.mat',
             'ymov-5':          'Data/ymov-hr-5.mat',
             'ymus-full':       'Data/ymus-hr-full.mat',
             'ymus-345':        'Data/ymus-hr-345.mat',
             'ymus-45':         'Data/ymus-hr-45.mat',
             'ymus-5':          'Data/ymus-hr-5.mat',
}

dataset = 'filmtrust-34'
path = path_dict[dataset]

print('Loading Data From {0}'.format(path))
data = gtl.load_mat_as_matrix(path, opt='coo')
original_matrix, train_matrix, test_matrix = data['original'], data['train'], data['test']
print('Users:{0}, Items:{1}, Ratings:{2}'.format(original_matrix.shape[0], original_matrix.shape[1], original_matrix.nnz))

gpu_options = tf.GPUOptions(allow_growth=True)
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          intra_op_parallelism_threads=24,
                                          inter_op_parallelism_threads=24,
                                          gpu_options=gpu_options)) as sess:
    model = AMF(sess,
                top_K=[5,10],
                num_factors=40,
                reg=0.015,

                reg_adv=100,
                noise_type='grad',
예제 #11
0
    'ymov-45': 'Data/ymov-hr-45.mat',
    'ymov-5': 'Data/ymov-hr-5.mat',
    'ymus-full': 'Data/ymus-hr-full.mat',
    'ymus-345': 'Data/ymus-hr-345.mat',
    'ymus-45': 'Data/ymus-hr-45.mat',
    'ymus-5': 'Data/ymus-hr-5.mat',
}

dataset = 'ciao-45'
path = path_dict[dataset]

date = '20180801'
filename = 'itempop.mat'

print('Loading Data From {0}'.format(path))
data = gtl.load_mat_as_matrix(path, opt='coo')
original_matrix, train_matrix, test_matrix = data['original'], data[
    'train'], data['test']
print('Users:{0}, Items:{1}, Ratings:{2}'.format(original_matrix.shape[0],
                                                 original_matrix.shape[1],
                                                 original_matrix.nnz))

num_user, num_item = original_matrix.shape[0], original_matrix.shape[1]

topK = [5, 10]
total_hr, total_ndcg = np.zeros(len(topK)), np.zeros(len(topK))

tr_mat = train_matrix.transpose()
item_pop_dict = {}
item_pop_list = []
for item, ratings in enumerate(tr_mat.data):
예제 #12
0
파일: Adv_AE.py 프로젝트: bennetyf/RecSys
    def __init__(self,
                 sess,
                 top_K,
                 num_factors=32,
                 ae_regs=[0.0, 0.0, 0.0, 0.0],
                 user_node_reg=0.0,
                 eps=0.5,
                 num_noise_factor=64,
                 drop_out_rate=0.0,
                 lr=0.001,
                 is_user_node=False,
                 noise_pos='W2',
                 noise_type='random',
                 robust_test=False,
                 adv_training=False,
                 noise_loss_ratio=0.0,
                 noise_loss_ratio_W1=0.0,
                 org_loss_ratio=0.0,
                 is_prec=False,
                 save_T=10,
                 epochs=100,
                 batch_size=128,
                 T=10**3,
                 verbose=False):

        # Parse the arguments and store them in the model
        self.session = sess

        self.num_factors = num_factors
        self.num_noise_factor = num_noise_factor
        self.ae_regs = ae_regs
        self.user_node_regs = user_node_reg
        self.is_user_node = is_user_node

        self.eps = eps
        self.topK = top_K

        self.dropout_rate = drop_out_rate

        self.noise_pos = noise_pos
        self.noise_type = noise_type

        self.lr = lr

        self.org_loss_ratio = org_loss_ratio
        self.noise_loss_ratio = noise_loss_ratio
        self.noise_loss_ratio_W1 = noise_loss_ratio_W1

        self.robust_test = robust_test
        self.adv_training = adv_training

        self.save_T = save_T

        self.epochs = epochs
        self.batch_size = batch_size
        self.skip_step = T
        self.verbose = verbose

        self.is_prec = is_prec

        # Training Records
        self.metric1, self.metric2 = [], []

        if self.noise_pos == 'USER':
            assert self.is_user_node

        assert not (self.robust_test and self.adv_training)

        gtl.print_paras(inspect.currentframe())
예제 #13
0
    # original_matrix \
    #     = mtl.load_original_matrix(datafile='Data/ml-1m/ratings.dat', header=['uid', 'iid', 'ratings', 'time'], sep='::')

    # train_matrix, test_matrix \
    #     = mtl.matrix_split(original_matrix, opt='prediction', mode='user', test_size=0.2, random_state=10)

    # train_matrix, test_matrix \
    #     = mtl.matrix_split(original_matrix, opt='ranking', mode='mat', n_item_per_user=1, random_state=10)

    # original_matrix = mtl.matrix_to_binary(original_matrix, 0)
    # train_matrix = mtl.matrix_to_binary(train_matrix, 0)
    # test_matrix = mtl.matrix_to_binary(test_matrix, 0)

    # gtl.matrix_to_mat('Data/ML1M_Rank_200_1_Data.mat', opt='coo', original=original_matrix, train=train_matrix, test=test_matrix)

    data = gtl.load_mat_as_matrix('Data/ML1M_Rank_200_1_Data.mat', opt='coo')
    original_matrix, train_matrix, test_matrix = data['original'], data[
        'train'], data['test']

    gpu_options = tf.GPUOptions(allow_growth=True)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          intra_op_parallelism_threads=8,
                                          inter_op_parallelism_threads=8,
                                          gpu_options=gpu_options)) as sess:
        model = GAN_AE(sess,
                       top_K=topK,
                       neg_ratio=neg_ratio,
                       num_factors=num_factors,
                       ae_regs=ae_regs,
                       eps=40,
                       lr=lr,