Ejemplo n.º 1
0
def train(config):
    # set seed
    set_seed(config.seed, cuda=config.use_cuda)

    # load dataset
    train = Interactions(config.train_root)
    # transform triplets to sequence representation
    train.to_sequence(config.L, config.T)

    test = Interactions(config.test_root,
                        user_map=train.user_map,
                        item_map=train.item_map)

    # print(config)
    # print(model_config)
    # fit model
    model = Recommender(n_iter=config.n_iter,
                        batch_size=config.batch_size,
                        learning_rate=config.learning_rate,
                        l2=config.l2,
                        neg_samples=config.neg_samples,
                        model_args=model_config,
                        use_cuda=config.use_cuda)

    return model.fit(train, test, verbose=True)
def main():
    parser = argparse.ArgumentParser()
    # data arguments
    parser.add_argument('--train_root',
                        type=str,
                        default='datasets/ml1m/train.csv')
    parser.add_argument('--val_root',
                        type=str,
                        default='datasets/ml1m/val.csv')
    parser.add_argument('--test_root',
                        type=str,
                        default='datasets/ml1m/test.csv')
    parser.add_argument('--L', type=int, default=20)
    parser.add_argument('--T', type=int, default=1)  #next_T
    parser.add_argument('--top_k', type=int, default=20)
    # train arguments
    parser.add_argument('--n_iter', type=int, default=64)
    parser.add_argument('--seed', type=int, default=1234)
    parser.add_argument('--batch_size', type=int, default=16)
    parser.add_argument('--learning_rate', type=float, default=0.001)
    parser.add_argument('--l2', type=float, default=1e-6)
    parser.add_argument('--neg_samples', type=int, default=1)  #same as T
    parser.add_argument('--decay_rate', default=0.99, type=float)
    # model arguments
    parser.add_argument('--d', type=int, default=50)  #embedding dims
    parser.add_argument(
        '--nv', type=int,
        default=4)  #the number of filters for vertical convolution
    parser.add_argument(
        '--nh', type=int,
        default=16)  #the number of filters for horizontal convolution
    parser.add_argument('--drop', type=float, default=0.5)
    parser.add_argument('--check_dir', type=str, default='save/')
    config = parser.parse_args()

    # set seed
    set_seed(config.seed)

    # load dataset
    train = Interactions(config.train_root)
    # transform triplets to sequence representation
    train.to_sequence(config.L, config.T)

    val = Interactions(config.val_root)
    val.to_sequence(config.L, config.T)

    print(config)
    # fit model
    model = Recommender(args=config)
    model.fit(train, val, verbose=True)
Ejemplo n.º 3
0
def test(args):
    data = Interactions(args.test_root)
    data.to_sequence(args.L, args.T)
    sequences_np = data.sequences.sequences
    targets_np = data.sequences.targets
    users_np = data.sequences.user_ids.reshape(-1, 1)
    n_test = sequences_np.shape[0]
    print('total test instances: %d' % n_test)
    num_users = data.num_users
    num_items = data.num_items
    NDCG, HR, MRR = 0.0, 0.0, 0.0
    item_ids = np.zeros((args.batch_size,num_items))
    for i in range(args.batch_size):
        item_ids[i] = np.arange(num_items)
    test_batches = n_test // args.batch_size

    model=Caser(num_users,num_items,args)
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(args.check_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('Restore model from {} successfully!'.format(args.check_dir))
        else:
            print('Restore model from {} failed!'.format(args.check_dir))
            return
        for i in range(test_batches):
            sequences = sequences_np[i * args.batch_size: (i + 1) * args.batch_size]
            targets = targets_np[i * args.batch_size: (i + 1) * args.batch_size]
            users = users_np[i * args.batch_size: (i + 1) * args.batch_size]
            _, top_k_index = model.predict(sess, sequences, users, item_ids)
            hr, ndcg, mrr = 0.0, 0.0, 0.0
            for i in range(args.batch_size):
                cur_user = top_k_index[i]
                for j in range(args.top_k):
                    if targets[i][0] == cur_user[j]:
                        hr += 1
                        mrr += 1 / (1 + j)
                        dcg = 1 / np.log2(1 + 1 + j)
                        idcg = 1 / np.log2(1 + 1)
                        ndcg += dcg / idcg
                        break
            HR += hr / args.batch_size
            NDCG += ndcg / args.batch_size
            MRR += mrr / args.batch_size
    return HR / test_batches, NDCG / test_batches, MRR / test_batches
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    # data arguments
    parser.add_argument('--train_root', type=str, default='datasets/ml1m/test/train.txt')
    parser.add_argument('--test_root', type=str, default='datasets/ml1m/test/test.txt')
    parser.add_argument('--L', type=int, default=5)
    parser.add_argument('--T', type=int, default=3)
    # train arguments
    parser.add_argument('--n_iter', type=int, default=50)
    parser.add_argument('--seed', type=int, default=1234)
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--learning_rate', type=float, default=1e-3)
    parser.add_argument('--l2', type=float, default=1e-6)
    parser.add_argument('--neg_samples', type=int, default=3)
    # model arguments
    parser.add_argument('--d', type=int, default=50)
    parser.add_argument('--nv', type=int, default=4)
    parser.add_argument('--nh', type=int, default=16)
    parser.add_argument('--drop', type=float, default=0.5)
    
    config = parser.parse_args()

    # set seed
    set_seed(config.seed)

    # load dataset
    train = Interactions(config.train_root)
    # transform triplets to sequence representation
    train.to_sequence(config.L, config.T)

    test = Interactions(config.test_root,
                        user_map=train.user_map,
                        item_map=train.item_map)

    print(config)
    # fit model
    model = Recommender(args=config)
    model.fit(train, test, verbose=True)
Ejemplo n.º 5
0
    def run(self):
        print('/1')
        index, r_index = 0, 0
        dict_locid_catid,dict_catid_to_index,dict_r_catid_to_index= {},{},{}
        for ny_vcat_i in self.ny_vcat[1:]:
            ny_vcat_i = ny_vcat_i.split(',')
            locid_ = ny_vcat_i[0]
            catid_ = ny_vcat_i[1]
            if locid_ not in dict_locid_catid:
                dict_locid_catid[locid_] = (catid_, int(catid_))
                if catid_ not in dict_catid_to_index:
                    dict_catid_to_index[catid_] = index
                    index += 1
                if int(catid_) not in dict_r_catid_to_index:
                    dict_r_catid_to_index[int(catid_)] = r_index
                    r_index += 1
        set_locid_name = set(dict_locid_catid.keys())
        locid_name = list(set(dict_locid_catid.keys()))
        catid_set = list(set(dict_catid_to_index.values()))
        relation_catid_set = list(set(dict_r_catid_to_index.values()))
        print('/2')
        for i in range(len(locid_name)):
            locid_catid_ = dict_locid_catid[locid_name[i]]
            index_ = dict_catid_to_index[locid_catid_[0]]
            r_index_ = dict_r_catid_to_index[locid_catid_[1]]
            dict_locid_catid[locid_name[i]] = (index_, r_index_)
        print('/3')
        dict_uid_category,uid_category_set,gender_set,race_set = {},[],[],[]
        for ny_demo_i in self.ny_demo[1:]:
            ny_demo_i = ny_demo_i.split(',')
            uid_ = ny_demo_i[0]
            gender_ = int(ny_demo_i[1]) - 1
            race_ = int(ny_demo_i[3]) - 1
            category_ = gender_ * race_ + race_
            dict_uid_category[uid_] = (category_, (gender_, race_))
            uid_category_set.append(category_)
            gender_set.append(gender_)
            race_set.append(race_)
        uid_category_set = list(set(uid_category_set))
        uid_name = set(dict_uid_category.keys())
        self.gender_set = list(set(gender_set))
        self.race_set = list(set(race_set))
        print('/4')
        dict_uid_time_locid = {}
        for i in range(len(self.ny_checkin) - 1):
            ny_checkin_i = self.ny_checkin[i + 1].split(',')
            uid_, time_, locid_ = ny_checkin_i[1], ny_checkin_i[
                2], ny_checkin_i[5].strip('\n')
            time_day, time_hms = time_.split()[0], time_.split()[1]
            if uid_ in uid_name and locid_ in set_locid_name:
                year_ = int(time_day.split('-')[0])
                if year_ in self.dict_restrict['year']:
                    if uid_ not in dict_uid_time_locid:
                        dict_uid_time_locid[uid_] = [(time_day, time_hms,
                                                      locid_)]
                    else:
                        dict_uid_time_locid[uid_].append(
                            (time_day, time_hms, locid_))
        print('/5')
        uid_name = list(set(dict_uid_time_locid.keys()))
        dict_uid_locid_relation, dict_locid_to_entity, index = {}, {}, 0
        for u_i in range(len(uid_name)):
            uid_time_locid_ = dict_uid_time_locid[uid_name[u_i]]
            if len(uid_time_locid_) >= self.dict_restrict['num_checkin']:
                sorted_uid_time_locid_ = sorted(uid_time_locid_)
                for i in range(len(sorted_uid_time_locid_)):
                    time_day_, time_hms_, locid_ = sorted_uid_time_locid_[i][
                        0], sorted_uid_time_locid_[i][
                            1], sorted_uid_time_locid_[i][2]
                    daynum_time_day_ = int(time_day_.split('-')[1]) - 1
                    time_h = int(time_hms_.split('-')[0].split(':')[0])
                    if time_h > 11:
                        time_h = 1
                    else:
                        time_h = 0
                    relation_time = time_h * daynum_time_day_ + daynum_time_day_
                    sorted_uid_time_locid_[i] = (locid_, relation_time)
                    if locid_ not in dict_locid_to_entity:
                        dict_locid_to_entity[locid_] = index
                        index += 1
                dict_uid_locid_relation[uid_name[u_i]] = sorted_uid_time_locid_
        uid_name = list(set(dict_uid_locid_relation.keys()))
        locid_name = list(set(dict_locid_to_entity.keys()))
        print('/6')
        test_X_num = self.dict_restrict['test_X_num']
        test_Y_num = self.dict_restrict['test_Y_num']
        train_set,test_set,test_X_set,test_Y_set,relation_time_set,data_set = [],[],[],[],[],[]
        for i in range(len(uid_name)):
            uid_locid_relation_ = dict_uid_locid_relation[uid_name[i]]
            data_set_, catid_set_ = [], []
            for j in range(len(uid_locid_relation_)):
                locid_ = uid_locid_relation_[j][0]
                catid_ = dict_locid_catid[locid_][0]
                entity_locid_ = dict_locid_to_entity[locid_]
                relation_time_ = uid_locid_relation_[j][1]
                data_set_.append(entity_locid_)
                relation_time_set.append(relation_time_)
                catid_set_.append(catid_)
            data_set.append(data_set_)
            train_set_ = data_set_[:len(data_set_) - test_X_num - test_Y_num]
            test_X_set_ = data_set_[len(data_set_) - test_X_num -
                                    test_Y_num:len(data_set_) - test_Y_num]
            test_Y_set_ = data_set_[len(data_set_) - test_Y_num:]
            test_set_ = data_set_[len(data_set_) - test_X_num - test_Y_num:]
            train_set.append(train_set_)
            test_set.append(test_set_)
            test_X_set.append(test_X_set_)
            test_Y_set.append(test_Y_set_)
        self.test_X_set = np.array(test_X_set)
        relation_time_set = list(set(relation_time_set))
        entity_set = list(set(dict_locid_to_entity.values()))
        print('/7')
        dict_uid_category_to_entity = {}
        for i in range(len(uid_category_set)):
            dict_uid_category_to_entity[
                uid_category_set[i]] = uid_category_set[i] + len(entity_set)
        dict_catid_to_entity = {}
        for i in range(len(catid_set)):
            dict_catid_to_entity[catid_set[i]] = len(entity_set) + len(
                uid_category_set) + catid_set[i]
        dict_relation_catid_to_r = {}
        for i in range(len(relation_catid_set)):
            dict_relation_catid_to_r[relation_catid_set[i]] = len(
                relation_time_set) + relation_catid_set[i]
        print('/8')
        self.dict_KG, self.uid_attribute_category = {}, []
        for i in range(len(uid_name)):
            uid_category_, uid_category_detail_ = dict_uid_category[
                uid_name[i]]
            entity_uid_category_ = dict_uid_category_to_entity[uid_category_]
            uid_locid_relation_ = dict_uid_locid_relation[uid_name[i]]
            gender_, race_ = uid_category_detail_[0], uid_category_detail_[1]
            self.uid_attribute_category.append((gender_, race_))
            for j in range(len(uid_locid_relation_)):
                locid_ = uid_locid_relation_[j][0]
                relation_time_ = uid_locid_relation_[j][1]
                entity_locid_ = dict_locid_to_entity[locid_]
                catid_, relation_catid_ = dict_locid_catid[locid_][
                    0], dict_locid_catid[locid_][1]
                entity_catid_ = dict_catid_to_entity[catid_]
                relation_catid_ = dict_relation_catid_to_r[relation_catid_]
                if entity_locid_ not in self.dict_KG:
                    self.dict_KG[entity_locid_] = {}
                    for k in range(len(self.gender_set)):
                        self.dict_KG[entity_locid_]['gender' + '-' + str(
                            self.gender_set[k])] = []
                        if self.gender_set[k] == gender_:
                            self.dict_KG[entity_locid_]['gender' + '-' + str(
                                self.gender_set[k])].append([
                                    entity_locid_, relation_time_,
                                    entity_uid_category_
                                ])
                            self.dict_KG[entity_locid_]['gender' + '-' + str(
                                self.gender_set[k])].append([
                                    entity_locid_, relation_catid_,
                                    entity_catid_
                                ])
                    for k in range(len(self.race_set)):
                        self.dict_KG[entity_locid_]['race' + '-' + str(
                            self.race_set[k])] = []
                        if self.race_set[k] == race_:
                            self.dict_KG[entity_locid_]['race' + '-' + str(
                                self.race_set[k])].append([
                                    entity_locid_, relation_time_,
                                    entity_uid_category_
                                ])
                            self.dict_KG[entity_locid_]['race' + '-' + str(
                                self.race_set[k])].append([
                                    entity_locid_, relation_catid_,
                                    entity_catid_
                                ])
                else:
                    for k in range(len(self.gender_set)):
                        if self.gender_set[k] == gender_:
                            self.dict_KG[entity_locid_]['gender' + '-' + str(
                                self.gender_set[k])].append([
                                    entity_locid_, relation_time_,
                                    entity_uid_category_
                                ])
                            self.dict_KG[entity_locid_]['gender' + '-' + str(
                                self.gender_set[k])].append([
                                    entity_locid_, relation_catid_,
                                    entity_catid_
                                ])
                    for k in range(len(self.race_set)):
                        if self.race_set[k] == race_:
                            self.dict_KG[entity_locid_]['race' + '-' + str(
                                self.race_set[k])].append([
                                    entity_locid_, relation_time_,
                                    entity_uid_category_
                                ])
                            self.dict_KG[entity_locid_]['race' + '-' + str(
                                self.race_set[k])].append([
                                    entity_locid_, relation_catid_,
                                    entity_catid_
                                ])

        self.dict_itemid_upfdf = dict()
        for i in range(len(uid_name)):
            uid_upf_ = self.uid_attribute_category[i]
            uid_data_set_ = data_set[i]
            for j in range(len(uid_data_set_)):
                if uid_data_set_[j] not in self.dict_itemid_upfdf:
                    self.dict_itemid_upfdf[uid_data_set_[j]] = list()
                self.dict_itemid_upfdf[uid_data_set_[j]].append(uid_upf_)
        itemid_name = list(set(self.dict_itemid_upfdf.keys()))
        for i in range(len(itemid_name)):
            itemid_upfdf_ = self.dict_itemid_upfdf[itemid_name[i]]
            b_0, b_1, b_2, g_0, g_1, g_2 = 0, 0, 0, 0, 0, 0
            for j in range(len(itemid_upfdf_)):
                if itemid_upfdf_[j] == (0, 0):
                    b_0 += 1
                elif itemid_upfdf_[j] == (0, 1):
                    b_1 += 1
                elif itemid_upfdf_[j] == (0, 2):
                    b_2 += 1
                elif itemid_upfdf_[j] == (1, 0):
                    g_0 += 1
                elif itemid_upfdf_[j] == (1, 1):
                    g_1 += 1
                elif itemid_upfdf_[j] == (1, 2):
                    g_2 += 1
                else:
                    print('Error!!')
            b_0_p, b_1_p, b_2_p = b_0 / len(itemid_upfdf_), b_1 / len(
                itemid_upfdf_), b_2 / len(itemid_upfdf_)
            g_0_p, g_1_p, g_2_p = g_0 / len(itemid_upfdf_), g_1 / len(
                itemid_upfdf_), g_2 / len(itemid_upfdf_)
            self.dict_itemid_upfdf[itemid_name[i]] = [
                b_0_p, b_1_p, b_2_p, g_0_p, g_1_p, g_2_p
            ]

        num_user = len(uid_name)
        num_item = len(entity_set)
        n_entities = len(entity_set) + len(uid_category_set) + len(catid_set)
        n_relation = len(relation_time_set) + len(relation_catid_set)

        train = Interactions(train_set, num_user, num_item)
        train.to_sequence(self.L_hgn, self.T_hgn)

        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids
        train_matrix = train.tocsr()

        param_ = [self.args, num_user, num_item, n_entities, n_relation]
        data_ = [users_np, sequences_np, targets_np, train_matrix]
        test_ = [
            train, self.test_X_set, test_Y_set, self.uid_attribute_category
        ]

        train_data, test_data = list(), list()
        entity_set = set(entity_set)
        for i in range(len(train_set)):
            uid_ = i
            neg_entity_set_ = list(entity_set - set(train_set[i]))
            neg_train_set_ = random.sample(neg_entity_set_, len(train_set[i]))
            for j in range(len(train_set[i])):
                train_data.append([uid_, train_set[i][j], 1])
                train_data.append([uid_, neg_train_set_[j], 0])
        for i in range(len(test_set)):
            uid_ = i
            neg_entity_set_ = list(entity_set - set(test_set[i]))
            neg_test_set_ = random.sample(neg_entity_set_, len(test_set[i]))
            for j in range(len(test_set[i])):
                test_data.append([uid_, test_set[i][j], 1])
                test_data.append([uid_, neg_test_set_[j], 0])
        train_data = np.array(train_data)
        eval_data = train_data
        test_data = np.array(test_data)

        user_history_dict = dict()
        for i in range(len(data_set)):
            uid_ = i
            user_history_dict[uid_] = data_set[i]

        pickle_data = {
            'train_data': train_data,
            'eval_data': eval_data,
            'test_data': test_data,
            'n_entity': n_entities,
            'n_relation': n_relation,
            'kg_np': self.KG_random_generator(),
            'user_history_dict': user_history_dict,
        }
        return param_, data_, test_
Ejemplo n.º 6
0
    model_parser.add_argument('--nv', type=int, default=2)
    model_parser.add_argument('--nh', type=int, default=16)
    model_parser.add_argument('--drop', type=float, default=0.5)
    model_parser.add_argument('--ac_conv', type=str, default='iden')
    model_parser.add_argument('--ac_fc', type=str, default='sigm')

    model_config = model_parser.parse_args()
    model_config.L = config.L

    # set seed
    set_seed(config.seed, cuda=config.use_cuda)

    # load dataset
    train = Interactions(config.train_root)
    # transform triplets to sequence representation
    train.to_sequence(config.L)

    test = Interactions(config.test_root,
                        user_map=train.user_map,
                        item_map=train.item_map)

    print(config)
    print(model_config)
    # fit model
    model = Recommender(n_iter=config.n_iter,
                        batch_size=config.batch_size,
                        learning_rate=config.learning_rate,
                        l2=config.l2,
                        neg_samples=config.neg_samples,
                        use_cuda=config.use_cuda,
                        checkpoint=config.checkpoint,
Ejemplo n.º 7
0
    parser.add_argument('--nh', type=int, default=16)
    parser.add_argument('--drop', type=float, default=0.5)
    parser.add_argument('--ac_conv', type=str, default='relu')
    parser.add_argument('--ac_fc', type=str, default='relu')
    config = parser.parse_args()
    # model_config = model_parser.parse_args()
    # model_config.L = config.L

    # set seed
    set_seed(config.seed,
             cuda=config.use_cuda)

    # load dataset
    train = Interactions(config.train_root)
    # transform triplets to sequence representation
    train.to_sequence(config.L, config.T)

    test = Interactions(config.test_root,
                        user_map=train.user_map,
                        item_map=train.item_map)

    print(config)
    #print(model_config)
    # fit model
    model = Recommender(n_iter=config.n_iter,
                        batch_size=config.batch_size,
                        learning_rate=config.learning_rate,
                        l2=config.l2,
                        neg_samples=config.neg_samples,
                        model_args=config,
                        use_cuda=config.use_cuda)
Ejemplo n.º 8
0
    parser.add_argument('--block_dim', type=list, default=[128, 256])
    parser.add_argument('--drop',
                        type=float,
                        default=0.5,
                        help='drop out ratio.')
    parser.add_argument('--fc_dim', type=int, default=150)
    parser.add_argument('--ac_fc',
                        type=str,
                        default='tanh',
                        choices=['relu', 'tanh', 'sigm'])

    args = parser.parse_args()

    # set seed
    set_seed(args.seed, cuda=args.use_cuda)
    # load dataset
    train = Interactions(args.data_root + args.dataset + args.train_dir)
    # transform triplets to sequence representation
    train.to_sequence(args.L, args.T)

    test = Interactions(args.data_root + args.dataset + args.test_dir,
                        user_map=train.user_map,
                        item_map=train.item_map)

    print(args)
    print('Using dataset: {}'.format(args.dataset))
    # fit model
    model = Recommender(args)

    model.fit(train, test, verbose=True)
Ejemplo n.º 9
0
def build_data(args):
    # load dataset
    train = Interactions(args.train_root)
    # transform triplets to sequence representation
    train.to_sequence(args.L, args.T)

    test = Interactions(args.test_root,
                        user_map=train.user_map,
                        item_map=train.item_map)

    if not os.path.exists('./topic_data'):
        os.mkdir('./topic_data')
    topic_path = f'./topic_data/data_{args.train_data}_{args.L}_{args.topic_num}.pkl'
    if args.train_lda or not os.path.exists(topic_path):
        matrix = dok_matrix((train.num_users, train.num_items - 1))
        user_items = defaultdict(list)
        with open(args.train_root) as f:
            for line in f:
                line = line[:-1]
                if line == '':
                    break
                user, item, _ = line.split()
                matrix[train.user_map[user], train.item_map[item] - 1] += 1
                user_items[train.user_map[user]].append(
                    f'i_{train.item_map[item]}')

        lda = LatentDirichletAllocation(n_components=args.topic_num,
                                        perp_tol=0.01,
                                        max_iter=200,
                                        max_doc_update_iter=500,
                                        n_jobs=args.n_jobs,
                                        random_state=args.seed,
                                        verbose=1,
                                        evaluate_every=10)
        lda.fit(matrix)
        item_probs = lda.components_ / lda.components_.sum(axis=1)[:,
                                                                   np.newaxis]
        print("LDA training process finished!")

        ### calculate train_probs
        ### This process may takes a lot of memories. Therefore, we apply batch processing
        n_sequences = train.sequences.sequences.shape[0]
        train_probs = np.zeros((n_sequences, args.topic_num))

        n_batches = ceil(n_sequences / args.lda_batch_size)
        for n in range(n_batches):
            sub_sequences = train.sequences.sequences[n *
                                                      args.lda_batch_size:(n +
                                                                           1) *
                                                      args.lda_batch_size]
            matrix = np.zeros((*sub_sequences.shape, train.num_items - 1),
                              dtype=np.float32)

            i = np.arange(sub_sequences.shape[0]).reshape(-1, 1)
            i = i.repeat(sub_sequences.shape[1], axis=1).reshape(-1)
            j = np.arange(sub_sequences.shape[1]).repeat(
                sub_sequences.shape[0])
            k = sub_sequences.reshape(-1) - 1

            matrix[i, j, k] += 1
            matrix = matrix.sum(axis=1)

            probs = lda.transform(matrix)
            train_probs[n * args.lda_batch_size:(n + 1) *
                        args.lda_batch_size, :] = probs

        # calculate test_probs
        n_sequences = train.test_sequences.sequences.shape[0]
        test_probs = np.zeros((n_sequences, args.topic_num))
        n_batches = ceil(n_sequences / args.lda_batch_size)
        for n in range(n_batches):
            sub_sequences = train.test_sequences.sequences[n *
                                                           args.lda_batch_size:
                                                           (n + 1) *
                                                           args.lda_batch_size]
            matrix = np.zeros((*sub_sequences.shape, train.num_items - 1),
                              dtype=np.float32)

            i = np.arange(sub_sequences.shape[0]).reshape(-1, 1)
            i = i.repeat(sub_sequences.shape[1], axis=1).reshape(-1)
            j = np.arange(sub_sequences.shape[1]).repeat(
                sub_sequences.shape[0])
            k = sub_sequences.reshape(-1) - 1

            matrix[i, j, k] += 1
            matrix = matrix.sum(axis=1)

            probs = lda.transform(matrix)
            test_probs[n * args.lda_batch_size:(n + 1) *
                       args.lda_batch_size, :] = probs

        train.sequences.probs = train_probs.astype(np.float32)
        train.test_sequences.probs = test_probs.astype(np.float32)

        with open(topic_path, 'wb') as f:
            pickle.dump((train, test, user_items, item_probs),
                        f,
                        protocol=pickle.HIGHEST_PROTOCOL)
    else:
        with open(topic_path, 'rb') as f:
            train, test, user_items, item_probs = pickle.load(f)

    return train, test, user_items, item_probs
Ejemplo n.º 10
0
subgraphs_mapping_i, subgraphs_G, subgraphs_mapping_u = hgut.subgraph_con(
    train_set, data_time[0], data_time[-2])

subgraphs_mapping_i, reversed_subgraphs_mapping_i, sorted_time, subgraphs_sequence_i, reversed_subgraphs_mapping_last_i = hgut.subgraph_key_building(
    subgraphs_mapping_i, num_items)

subgraphs_mapping_u, reversed_subgraphs_mapping_u, sorted_time_u, subgraphs_sequence_u, reversed_subgraphs_mapping_last_u = hgut.subgraph_key_building(
    subgraphs_mapping_u, num_users)

assert sorted_time == sorted_time_u

train_data = Interactions(train_set, data_time[0], num_users, num_items,
                          sorted_time)
train_data.to_sequence(subgraphs_mapping_i, subgraphs_mapping_u,
                       subgraphs_sequence_i, subgraphs_sequence_u,
                       args.seq_len, args.T)

#for bpr pretrain
bpr_tuples = list(zip(train_data.user_ids, train_data.item_ids))
train_dic = {}
for i in train_set:
    train_dic[i] = set(train_set[i])

neg_test_dy = np.zeros((num_users, 101), dtype=np.int64)
for i in range(1, num_users):
    for j in range(101):
        neg_test_dy[i][j] = subgraphs_sequence_i[neg_test[i][j]][-1]

print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print(args)