Example #1
0
 def prepare_data(self, original_matrix, train_matrix, test_matrix):
     self.num_user, self.num_item = original_matrix.shape
     self.train_uid, self.train_iid, _ = mtl.matrix_to_list(train_matrix)
     self.neg_dict, self.ranking_dict, self.test_dict = \
         mtl.negdict_mat(original_matrix, test_matrix, mod='precision', random_state=20)
     # self.neg_dict, self.ranking_dict, self.test_dict = mtl.negdict_mat(original_matrix, test_matrix, num_neg=self.num_ranking_neg)
     self.num_training = len(self.train_uid)
     self.num_batch = int(self.num_training / self.batch_size)
     print("Data Preparation Completed.")
Example #2
0
 def prepare_data(self, original_matrix, train_matrix, test_matrix):
     self.num_user, self.num_item = original_matrix.shape
     self.train_uid, self.train_iid, self.train_ratings = mtl.matrix_to_list(
         train_matrix)
     self.test_uid, self.test_iid, self.test_ratings = mtl.matrix_to_list(
         test_matrix)
     self.mu = np.mean(self.train_ratings)
     self.num_training = len(self.train_ratings)
     self.num_batch = int(self.num_training / self.batch_size)
     print("Data Preparation Completed.")
Example #3
0
    def prepare_data(self, original_matrix, train_matrix, test_matrix):
        self.num_user, self.num_item = original_matrix.shape
        self.train_array = train_matrix.toarray()
        _, self.ranking_dict, self.test_dict = \
            mtl.negdict_mat(original_matrix, test_matrix, num_neg=None, neg_ratio=self.ranking_list_ratio)

        self.negative_output_mask = mtl.neg_mask_array(original_matrix, train_matrix, neg_ratio = self.neg_ratio)

        self.num_training = self.train_array.shape[0]
        self.num_batch = int(self.num_training / self.batch_size)
        print("Data Preparation Completed.")
Example #4
0
    def prepare_data(self, original_matrix, train_matrix, test_matrix):
        self.num_user, self.num_item = original_matrix.shape
        self.train_array = train_matrix.toarray()

        if self.is_prec:
            _, self.ranking_dict, self.test_dict = \
                mtl.negdict_mat(original_matrix, test_matrix, mod='precision', random_state=20)
        else:
            _, self.ranking_dict, self.test_dict = \
                mtl.negdict_mat(original_matrix, test_matrix, num_neg=199, mod='others', random_state=0)

        self.num_training = self.train_array.shape[0]
        self.num_batch = int(self.num_training / self.batch_size)
        print("Data Preparation Completed.")
Example #5
0
    def prepare_data(self, original_matrix, train_matrix, test_matrix):
        self.num_user, self.num_item = original_matrix.shape
        self.train_uid, self.train_iid, self.train_labels = mtl.matrix_to_list(
            train_matrix)
        self.neg_dict, self.ranking_dict, self.test_dict = mtl.negdict_mat(
            original_matrix, test_matrix, num_neg=self.num_ranking_neg)

        # Negative Sampling on Lists
        print("Enter NegSa")
        start_time = time.time()
        self.train_uid, self.train_iid, self.train_labels = \
            mtl.negative_sample_list(user_list=self.train_uid,item_list=self.train_iid,rating_list=self.train_labels,
                                 num_neg=self.num_neg,neg_val=0,neg_dict=self.neg_dict)
        print("Leaving NegSa")
        print("Negative Sampling Time: {0}".format(time.time() - start_time))

        self.num_training = len(self.train_labels)
        self.num_batch = int(self.num_training / self.batch_size)

        print("Data Preparation Completed.")
Example #6
0
    def prepare_data(self, original_matrix, train_matrix, test_matrix):
        self.num_user, self.num_item = original_matrix.shape
        self.train_array = train_matrix.toarray()

        _, self.ranking_dict, self.test_dict = \
            mtl.negdict_mat(original_matrix, test_matrix, mod = 'precision', random_state = 20)

        # self.negative_output_mask = mtl.neg_mask_array(original_matrix, train_matrix, num_neg=self.output_neg_num)

        self.num_training = self.train_array.shape[0]
        self.num_batch = int(self.num_training / self.batch_size)
        print("Data Preparation Completed.")
Example #7
0
    def prepare_data(self, original_matrix, train_matrix, test_matrix):
        self.num_user, self.num_item = original_matrix.shape

        # Negative Dicts for Ranking
        _, self.ranking_dict, self.test_dict = mtl.negdict_mat(
            original_matrix, test_matrix, num_neg=self.num_ranking_neg)

        # To Lists
        # Contain all the explicit and implicit ratings for training
        self.train_uid, self.train_iid, self.train_ratings = mtl.get_full_matrix(
            train_matrix, test_matrix)

        # The Cui and Pui parameters in the loss function
        self.weights_ui = [1 + self.alpha * ele for ele in self.train_ratings]
        self.indicators_ui = mtl.list_to_binary(self.train_ratings, 0)

        # This is for the ALS algorithm
        self.train_matrix_csr, self.train_matrix_csc = train_matrix.tocsr(
        ), train_matrix.tocsc()

        self.num_training = len(self.train_ratings)
        self.num_batch = int(self.num_training / self.batch_size)
        print("Data Preparation Completed.")
Example #8
0
    def prepare_data(self, original_matrix1, train_matrix1, test_matrix1,
                     original_matrix2, train_matrix2, test_matrix2):

        # Meta Info
        self.num_user1, self.num_item1 = train_matrix1.shape
        self.num_user2, self.num_item2 = train_matrix2.shape

        self.neg_dict1, self.test_dict1 = mtl.negdict_mat(
            original_matrix1, test_matrix1, num_neg=self.num_ranking_list - 1)
        self.neg_dict2, self.test_dict2 = mtl.negdict_mat(
            original_matrix2, test_matrix2, num_neg=self.num_ranking_list - 1)

        self.train_uid1, self.train_iid1, self.train_labels1 = mtl.matrix_to_list(
            train_matrix1)
        self.train_uid2, self.train_iid2, self.train_labels2 = mtl.matrix_to_list(
            train_matrix2)

        # Extend the shorter training data
        length1, length2 = len(self.train_labels1), len(self.train_labels2)
        if length1 < length2:
            self.train_uid1, self.train_iid1, self.train_labels1 =\
                mtl.data_upsample_list(self.train_uid1, self.train_iid1, self.train_labels1,num_ext=length2-length1)
        if length2 < length1:
            self.train_uid2, self.train_iid2, self.train_labels2 =\
                mtl.data_upsample_list(self.train_uid2, self.train_iid2, self.train_labels2,num_ext=length1-length2)

        assert len(self.train_labels1) == len(self.train_labels2)

        # Negative Sampling on Lists
        print("Enter NegSa")
        start_time = time.time()

        results = mp.Pool(processes=2).map(
            gtl.mphelper,
            [(mtl.negative_sample_list, self.neg_dict1, self.train_uid1,
              self.train_iid1, self.train_labels1, self.num_neg, 0),
             (mtl.negative_sample_list, self.neg_dict2, self.train_uid2,
              self.train_iid2, self.train_labels2, self.num_neg, 0)])

        self.train_uid1, self.train_iid1, self.train_labels1 = results[0]
        self.train_uid2, self.train_iid2, self.train_labels2 = results[1]

        # self.train_uid1, self.train_iid1, self.train_labels1 \
        #    = mtl.negative_sample_list(self.neg_dict1, self.train_uid1, self.train_iid1, self.train_labels1,num_neg=self.num_neg,neg_val=0)
        #
        # self.train_uid2, self.train_iid2, self.train_labels2 \
        #    = mtl.negative_sample_list(self.neg_dict2, self.train_uid2, self.train_iid2, self.train_labels2, num_neg=self.num_neg, neg_val=0)
        print("Leaving NegSa")
        print("Negative Sampling Time: {0}".format(time.time() - start_time))

        assert len(self.train_labels1) == len(self.train_labels2)
        self.num_training = len(self.train_labels1)
        self.num_batch = int(self.num_training / self.batch_size)

        print("Data Preparation Completed.")
Example #9
0
if __name__ == "__main__":

    args = parseArgs()
    num_epochs, batch_size, \
    reg_embs, num_neg, lr, ndcgk, num_factors, num_ranking_list, num_test = \
    args.epochs, args.batch_size,\
    args.ebregs, args.num_neg, args.lr, args.ndcgk, args.nfactors, args.num_rk, args.ntest

    reg_embs = list(np.float32(eval(reg_embs)))

    # original_matrix, train_matrix, test_matrix, num_users, num_items \
    #     = mtl.load_as_matrix(datafile='Data/books_and_elecs_merged.csv')

    original_matrix \
        = mtl.load_original_matrix(datafile='Data/ml-100k/u.data',header=['uid','iid','ratings','time'],sep='\t')
    original_matrix = mtl.matrix_to_binary(original_matrix, 0)
    train_matrix, test_matrix = mtl.matrix_split(original_matrix,
                                                 opt='ranking',
                                                 n_item_per_user=num_test)

    num_users, num_items = original_matrix.shape
    print("Number of users is {0}".format(num_users))
    print("Number of items is {0}".format(num_items))
    print("Number of ratings for all is {0}".format(original_matrix.nnz))
    print("Number of ratings for training is {0}".format(train_matrix.nnz))
    print("Ratings density for training is {0}".format(
        train_matrix.nnz / (num_users * num_items)))

    gpu_options = tf.GPUOptions(allow_growth=True)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
Example #10
0
        default='adam',
        choices=('adam', 'adagrad', 'rmsprop', 'sgd'),
        help='Specify an optimizer: adagrad, adam, rmsprop, sgd')
    return parser.parse_args()


if __name__ == "__main__":

    args = parseArgs()
    num_epochs, batch_size, regs, num_neg, alpha, lr, ndcgk, num_factors, num_ranking_list = \
        args.epochs, args.batch_size, args.ebregs,args.num_neg,args.alpha,args.lr,args.ndcgk, args.nfactors, args.num_rk

    regs = list(np.float32(eval(regs)))

    original_matrix1, train_matrix1, test_matrix1, num_users1, num_items1\
        = mtl.load_as_matrix(datafile='Data/books_small/original.csv')

    original_matrix2, train_matrix2, test_matrix2, num_users2, num_items2\
        = mtl.load_as_matrix(datafile='Data/elec_small/original.csv')

    print("Number of users in domain 1 is {0}".format(num_users1))
    print("Number of items in domain 1 is {0}".format(num_items1))
    print("Number of ratings in domain 1 in all is {0}".format(
        original_matrix1.nnz))
    print("Number of ratings in domain 1 for training is {0}".format(
        train_matrix1.nnz))
    print("Ratings density of domain 1 for training is {0}".format(
        train_matrix1.nnz / (num_users1 * num_items1)))

    print("Number of users in domain 2 is {0}".format(num_users2))
    print("Number of items in domain 2 is {0}".format(num_items2))
Example #11
0
num_user, num_item = original_matrix.shape[0], original_matrix.shape[1]

topK = [5, 10]
total_hr, total_ndcg = np.zeros(len(topK)), np.zeros(len(topK))

tr_mat = train_matrix.transpose()
item_pop_dict = {}
item_pop_list = []
for item, ratings in enumerate(tr_mat.data):
    item_pop_dict[item] = len(ratings)
    item_pop_list.append(len(ratings))
item_pop_arr = np.asarray(item_pop_list)

_, ranking_dict, test_dict = mtl.negdict_mat(original_matrix,
                                             test_matrix,
                                             num_neg=199,
                                             mod='others',
                                             random_state=10)

for user in ranking_dict:

    if len(test_dict[user]) == 0:
        continue

    iid = ranking_dict[user]  # The ranking item ids for user u
    rk = item_pop_arr[np.asarray(iid)]
    print(rk)
    hr, ndcg = evl.rankingMetrics(rk, iid, topK, test_dict[user], mod='hr')
    total_hr += hr
    total_ndcg += ndcg
Example #12
0
if __name__ == "__main__":

    args = parseArgs()
    num_epochs, batch_size, \
    regs, lr, ndcgk, num_factors, num_ranking_list, num_test = \
    args.epochs, args.batch_size,\
    args.regs, args.lr, args.ndcgk, args.nfactors, args.num_rk, args.ntest

    regs_ui = list(np.float32(eval(regs)))

    # original_matrix, train_matrix, test_matrix, num_users, num_items \
    #     = mtl.load_as_matrix(datafile='Data/books_and_elecs_merged.csv')

    original_matrix \
        = mtl.load_original_matrix(datafile='Data/ml-1m/ratings.dat', header=['uid', 'iid', 'ratings', 'time'], sep='::')

    # original_matrix = mtl.matrix_theshold(original_matrix,threshold=2)

    original_matrix = mtl.matrix_to_binary(original_matrix, 0)

    # train_matrix, test_matrix = mtl.matrix_split(original_matrix,n_item_per_user=num_test)
    train_matrix, test_matrix = mtl.matrix_split(original_matrix,
                                                 opt='prediction',
                                                 mode='user',
                                                 test_size=0.2,
                                                 random_state=10)

    gpu_options = tf.GPUOptions(allow_growth=True)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          intra_op_parallelism_threads=8,