Exemple #1
0
    os.makedirs(args.dataset + '_log', exist_ok=True)
    if args.debug:
        f = open(os.path.join(args.dataset + '_log', 'debug_log.txt'), 'w')
        f.write('\n'.join([str(k) + ',' + str(v) for k, v in sorted(vars(args).items(), key=lambda x: x[0])]))
        f.write('\n')
    else:
        f = open(os.path.join(args.dataset + '_log', 'exp_log.txt'), 'w')
        f.write('\n'.join([str(k) + ',' + str(v) for k, v in sorted(vars(args).items(), key=lambda x: x[0])]))
        f.write('\n')
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    sess = tf.Session(config=config)

    sampler = WarpSampler(sequences_train, contexts_train,
                          usernum, itemnum, context_dim,
                          batch_size=args.batch_size, maxlen=args.maxlen, n_workers=12)

    model = Model(usernum, itemnum, context_dim, args)
    sess.run(tf.initialize_all_variables())

    T = 0.0

    seqs, contexts, pos_contexts, test_items, valid_users = prepare_test_data(dataset=dataset, args=args)
    seqs_2, contexts_2, pos_contexts_2, test_items_2, valid_users_2 = prepare_valid_data(dataset=dataset, args=args)
    sample_seq = seqs[7]
    sample_context = contexts[7]
    sample_pos_context = pos_contexts[7]
    sample_test_item = test_items[7]
    sample_test_user = valid_users[7]
    i = 0
Exemple #2
0
dataset = data_partition(args.dataset)
[user_train, user_valid, user_test, usernum, itemnum] = dataset
num_batch = len(user_train) / args.batch_size
cc = 0.0
for u in user_train:
    cc += len(user_train[u])
print 'average sequence length: %.2f' % (cc / len(user_train))

f = open(os.path.join(args.dataset + '_' + args.train_dir, 'log.txt'), 'w')
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)

sampler = WarpSampler(user_train, usernum, itemnum, batch_size=args.batch_size, maxlen=args.maxlen, n_workers=3)
valid_sampler = WarpSampler(user_valid, usernum, itemnum, batch_size=usernum, maxlen=args.maxlen, n_workers=1)
test_sampler = WarpSampler(user_test, usernum, itemnum, batch_size=usernum, maxlen=args.maxlen, n_workers=1)

# import IPython; IPython.embed()
model = Model(usernum, itemnum, args)
sess.run(tf.initialize_all_variables())

T = 0.0
t0 = time.time()

try:
    for epoch in range(1, args.num_epochs + 1):
        losses = []
        for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b', desc='Epoch=%d'%epoch):
            u, seq, pos, neg = sampler.next_batch()
Exemple #3
0
    n_users, n_items = user_item_matrix.shape

    # make feature as dense matrix
    dense_features = None
    Ks_test_recalls = np.zeros([10, 6])
    Ks_test_precisions = np.zeros([10, 6])
    Aupr_values = np.zeros([10, 1])

    for num in range(1, 11):
        print("Experiment num ", num, "\n")
        np.random.seed(num)

        train, valid, test = split_data(user_item_matrix,
                                        split_ratio=(3, 1, 1))
        sampler = WarpSampler(train,
                              batch_size=BATCH_SIZE,
                              n_negative=N_NEGATIVE,
                              check_negative=True)

        model = CML(
            n_users,
            n_items,
            # enable feature projection
            features=dense_features,
            assmat=train,
            embed_dim=EMBED_DIM,
            margin=2.0,
            clip_norm=1.1,
            master_learning_rate=0.1,
            # the size of the hidden layer in the feature projector NN
            hidden_layer_dim=512,
            # dropout rate between hidden layer and output layer in the feature projector NN
Exemple #4
0
    for r in Item[item]['related']:
        Item[item]['related'][r] = set(Item[item]['related'][r])

    item_i_mask_list = []
    item_i_mask = np.zeros([num_rel + 1])
    for r in Item[item]['related'].keys():
        if len(Item[item]['related'][r]) != 0:
            item_i_mask[invRelationships[r] + 1] = 1.0
            item_i_mask_list.append(invRelationships[r] + 1)
    item_i_mask[0] = 1.0
    item_i_mask_list.append(0)
    Item[item]['mask'] = item_i_mask
    Item[item]['mask_list'] = item_i_mask_list

# define sampler with multi-processing
sampler = WarpSampler(user_train, user_train_set, Item, usernum, itemnum, Relationships, batch_size=args.batch_size,
                      n_workers=4)
valid_sampler = WarpSampler(user_train, user_train_set, Item, usernum, itemnum, Relationships,
                            batch_size=args.batch_size, is_test=True, User_test=user_validation, n_workers=2)
test_sampler = WarpSampler(user_train, user_train_set, Item, usernum, itemnum, Relationships,
                           batch_size=args.batch_size, is_test=True, User_test=user_test, n_workers=2)

sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

model = MoHR(usernum, itemnum, Relationships, args)
sess.run(tf.initialize_all_variables())

best_valid_auc = 0.5
best_iter = 0
num_batch = oneiteration / args.batch_size

    def train(self):

        print('train start')
        #train, valid, artists_dic, albums_dic, titles, titles_len = spotify(self.mode, self.valid)
        sampler = WarpSampler(self.train_data,
                              self.unigram_probs,
                              batch_size=self.batch_size,
                              n_negative=self.n_negative,
                              check_negative=True)

        # sample some users to calculate recall validation
        items = self.from_numpy(np.arange(self.n_items))
        prev_recall = 0
        recall_score = 0.0000001
        prev_ndcg = 0
        ndcg_score = 0.0000001
        epoch = 1
        while epoch <= self.num_epochs:

            self.model.train()
            if prev_recall < recall_score and prev_ndcg < ndcg_score:

                prev_recall = recall_score
                prev_ndcg = ndcg_score
            self.save_model()
            print('Model saved')

            # train model
            losses = []
            # run n mini-batches

            #try:
            #    for obj in gc.get_objects():
            #        if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
            #            print(type(obj), obj.size())
            #except:
            #    pass
            #self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
            for i in tqdm(range(self.evaluation_every_n_batchs),
                          desc="Optimizing..."):

                user_pos, neg = sampler.next_batch()
                pos_artists = self.from_numpy(self.artists_dic[user_pos[:, 1]])
                pos_albums = self.from_numpy(self.albums_dic[user_pos[:, 1]])
                neg_artists = self.from_numpy(
                    np.array([
                        self.artists_dic[negative_sample]
                        for negative_sample in neg
                    ])).type(torch.long)
                neg_albums = self.from_numpy(
                    np.array([
                        self.albums_dic[negative_sample]
                        for negative_sample in neg
                    ])).type(torch.long)
                titles = None  #self.from_numpy(self.titles[user_pos[:, 0]])
                titles_len = None  #self.from_numpy(self.titles_len[user_pos[:, 0]])
                user_pos = self.from_numpy(user_pos).type(torch.long)
                neg = self.from_numpy(neg).type(torch.long)
                self.model.zero_grad()
                pos_distances, distance_to_neg_items, closest_negative_item_distances = self.model(
                    user_pos, pos_artists, pos_albums, neg, neg_artists,
                    neg_albums, titles,
                    titles_len)  # / (self.optim_size / self.batch_size)
                loss = self.get_loss(pos_distances, distance_to_neg_items,
                                     closest_negative_item_distances)
                loss.backward(retain_graph=False)
                #torch.nn.utils.clip_grad_norm(self.model.parameters(), self.clip_norm)
                self.optimizer.step()
                self.clip_by_norm(self.model.module.user_embeddings)
                self.clip_by_norm(self.model.module.item_embeddings)
                self.clip_by_norm(self.model.module.artist_embeddings)
                self.clip_by_norm(self.model.module.album_embeddings)
                #self.clip_by_norm(self.model.title_embeddings)
                #self.model.title_embeddings.weight.data = self.clip_by_norm(self.model.title_embeddings.weight.data)
                #if (i+1) % (self.optim_size / self.batch_size) == 0 or i == self.evaluation_every_n_batchs-1:
                #    self.optimizer.step()
                #    self.model.zero_grad()

                losses.append(loss.detach().cpu().numpy())

            torch.cuda.empty_cache()
            print("\nTraining loss {}".format(np.mean(losses)))
            epoch += 1

            # compute recall in chunks to utilize speedup provided by Tensorflow
            artists = self.from_numpy(self.artists_dic)
            albums = self.from_numpy(self.albums_dic)
            titles = self.from_numpy(self.titles)
            titles_len = self.from_numpy(self.titles_len)
            #ratios = self.from_numpy(self.ratios)
            self.model.eval()

            for i in range(10):
                # create evaluator on validation set
                validation_recall = RecallEvaluator(self.model,
                                                    self.train_data,
                                                    self.valid_data[i])
                # compute recall on validate set
                valid_recalls = []
                valid_ndcgs = []
                valid_users = np.array(
                    list(set(self.valid_data[i].nonzero()[0])), )
                #valid_users = list(set(self.valid_data[i].nonzero()[0]))
                for user_chunk in toolz.partition_all(50, valid_users):
                    user_chunk = self.from_numpy(np.array(user_chunk)).type(
                        torch.long)
                    recall, ndcg = validation_recall.eval(
                        user_chunk, items, artists, albums, titles[user_chunk],
                        titles_len[user_chunk], None)
                    valid_recalls.extend(recall)
                    valid_ndcgs.extend(ndcg)

                #for user_chunk in valid_users:
                #items = np.array(self.val_candidates[user_chunk])
                #if len(items) > 50:
                #artists = self.from_numpy(self.artists_dic[items]).type(torch.long)
                #albums = self.from_numpy(self.albums_dic[items]).type(torch.long)
                #items = self.from_numpy(items).type(torch.long)
                #user_chunk = self.from_numpy(np.array([user_chunk])).type(torch.long)
                #recall, ndcg = validation_recall.eval(user_chunk, items, artists, albums, titles[user_chunk], titles_len[user_chunk], ratios[user_chunk-990000])
                #valid_recalls.extend([recall])
                #valid_ndcgs.extend([ndcg])
                #else:
                #print(len(items))
                #valid_recalls.extend([[0.0]])
                #valid_ndcgs.extend([[0.0]])

                recall_score = np.mean(valid_recalls)
                ndcg_score = np.mean(valid_ndcgs)
                print('\nNo. {}'.format(i))
                print("Recall on (sampled) validation set: {}".format(
                    recall_score))
                print(
                    "Ndcg on (sampled) validation set: {}".format(ndcg_score))
            print('Epoch: {}'.format(epoch))
        torch.cuda.empty_cache()
        self.predict()
Exemple #6
0

if __name__ == '__main__':
    args = parse_args()
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    np.random.seed(2017)
    random_seed = 2017
    tf.compat.v1.set_random_seed(random_seed)

    print("%s reading dataset %s" % (datetime.now(), args.dataset))
    train, valid = dataset_to_uimatrix(args.train_data, args.test_data)
    n_users, n_items = train.shape
    print("%s #users=%d, #items=%d" % (datetime.now(), n_users, n_items))
    sampler = WarpSampler(train,
                          batch_size=args.batch_size,
                          n_negative=args.num_negative,
                          check_negative=True)

    # WITHOUT features
    # Train a user-item joint embedding, where the items a user likes will be pulled closer to this users.
    # Once the embedding is trained, the recommendations are made by finding the k-Nearest-Neighbor to each user.
    model = CML(
        n_users,
        n_items,
        # set features to None to disable feature projection
        features=None,
        # size of embedding
        embed_dim=args.embed_dim,
        # the size of hinge loss margin.
        margin=args.margin,
        # clip the embedding so that their norm <= clip_norm
Exemple #7
0
def main():

    prepare_env()

    dataset, user_train, usernum, itemnum, num_batch = load_dataset()

    sampler = WarpSampler(
        user_train,
        usernum,
        itemnum,
        args=args,
        batch_size=args.batch_size,
        maxlen=args.maxlen,
        threshold_user=args.threshold_user,
        threshold_item=args.threshold_item,
        n_workers=3,
    )

    graph, model, num_experts, expert_paths, global_step, saver = create_model(
        usernum, itemnum, args)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True

    with tf.Session(config=config, graph=graph) as sess:
        sess.run(tf.global_variables_initializer())

        if num_experts > 1:
            for i, path in enumerate(
                    expert_paths):  # restore experts' variables
                restore_collection(path, "expert_{}".format(i), sess, graph)

        best_result = 0.0
        best_res_path = os.path.join(args.train_dir, args.best_res_log)
        if os.path.isfile(best_res_path):
            with open(best_res_path, 'r') as inf:
                best_result = float(inf.readline().strip())
        best_step = 0
        no_improve = 0
        save_path = tf.train.latest_checkpoint(args.train_dir)
        if save_path:
            saver.restore(sess, save_path)
            print("[restored] {}".format(save_path))
        else:
            save_path = saver.save(sess,
                                   os.path.join(args.train_dir, "model.ckpt"),
                                   global_step)
            print("[saved] {}".format(save_path))

        T = 0.0
        t0 = time.time()
        t_valid = evaluate_valid(model, dataset, args, sess)
        print("[init] time = {}, best = {}, eval HR@{} = {}, HR@{} = {}],".
              format(time.time() - t0, best_result, args.k, t_valid[0],
                     args.k1, t_valid[1]))
        if args.std_test:
            t0 = time.time()
            t_test = evaluate(model, dataset, args, sess)
            print(
                "[init] time = {}, test NDCG{} = {}, NDCG{} = {}, HR{} = {}, HR{} = {}]"
                .format(time.time() - t0, args.k, t_test[0], args.k1,
                        t_test[1], args.k, t_test[2], args.k1, t_test[3]))

        t0 = time.time()

        for epoch in range(1, args.num_epochs + 1):
            # for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'):
            total_loss = 0.0
            for step in range(num_batch):
                u, seq, pos, neg = sampler.next_batch()
                if num_experts > 1:
                    log_freq = 100
                    loss, _, global_step_val = sess.run(
                        [model.loss, model.train_op, global_step], {
                            model.u: u,
                            model.input_seq: seq,
                            model.pos: pos,
                            model.is_training: True
                        })
                    if step % log_freq == 0:
                        print("[step-{}] {}/{}, avg_loss = {}".format(
                            global_step_val, step + 1, num_batch,
                            total_loss / log_freq))
                        total_loss = 0.0
                    else:
                        total_loss += loss
                else:
                    user_emb_table, item_emb_table, attention, auc, loss, _, global_step_val = sess.run(
                        [
                            model.user_emb_table, model.item_emb_table,
                            model.attention, model.auc, model.loss,
                            model.train_op, global_step
                        ], {
                            model.u: u,
                            model.input_seq: seq,
                            model.pos: pos,
                            model.neg: neg,
                            model.is_training: True
                        })
                    print("[step-{}] {}/{}, auc = {}, loss = {}".format(
                        global_step_val, step + 1, num_batch, auc, loss))
                sys.stdout.flush()

            if epoch % args.eval_freq == 0:
                t1 = time.time()
                T += t1 - t0
                # t_test = evaluate(model, dataset, args, sess)
                t_valid = evaluate_valid(model, dataset, args, sess)
                t2 = time.time()
                # print("[{0}, {1}, {2}, {3}, {4}, {5}],".format(epoch, T, t_valid[0], t_valid[1], t_test[0], t_test[1]))
                print(
                    "[epoch = {}, time = {} (train/eval = {}/{}), HR@{} = {}, HR@{} = {}],"
                    .format(epoch, T, t1 - t0, t2 - t1, args.k, t_valid[0],
                            args.k1, t_valid[1]))
                t0 = t2

                # early stopping
                if t_valid[args.eval_tgt_idx] > best_result:
                    print("[best_result] {} (step-{}) < {} (step-{})".format(
                        best_result, best_step, t_valid[args.eval_tgt_idx],
                        global_step_val))
                    best_result = t_valid[args.eval_tgt_idx]
                    best_step = global_step_val
                    # ckpt_paths = glob(os.path.join(args.train_dir, "model.ckpt*"))
                    # for path in ckpt_paths:
                    #   os.remove(path)
                    #   print("[removed] {}".format(path))
                    with open(best_res_path, 'w') as outf:
                        outf.write("{}".format(best_result))
                    save_path = saver.save(
                        sess, os.path.join(args.train_dir, "model.ckpt"),
                        global_step_val)
                    print("[saved] {}".format(save_path))
                    no_improve = 0
                else:
                    print("[best_result] {} (step-{}) > {} (step-{})".format(
                        best_result, best_step, t_valid[args.eval_tgt_idx],
                        global_step_val))
                    no_improve += args.eval_freq
                    if args.early_stop_epochs < 0:  # turn off early stopping
                        save_path = saver.save(
                            sess, os.path.join(args.train_dir, "model.ckpt"),
                            global_step_val)
                        print("[saved] {}".format(save_path))
                    else:
                        if no_improve >= args.early_stop_epochs:
                            print(
                                "[stop training] no improvement for {} epochs".
                                format(no_improve))
                            break
                sys.stdout.flush()

        if args.std_test:
            t_test = evaluate(model, dataset, args, sess)
            print(
                "[final] time = {}, test NDCG{} = {}, NDCG{} = {}, HR{} = {}, HR{} = {}]"
                .format(time.time() - t0, args.k, t_test[0], args.k1,
                        t_test[1], args.k, t_test[2], args.k1, t_test[3]))

    sampler.close()
    print("[Done]")
Exemple #8
0
for user in range(usernum):
    oneiteration += len(user_train[user]['consume'])
    for item in set(user_train[user]['produce']):
        if item in owner:
            print "multiple_creators!"
        owner[item] = user
for item in range(itemnum):
    if item not in owner:
        print "missing creator!"
        break
oneiteration = min(1000000, oneiteration)

sampler = WarpSampler(user_train,
                      user_validation,
                      user_test,
                      owner,
                      usernum,
                      itemnum,
                      batch_size=args.batch_size,
                      n_workers=1)

sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
model = Model(usernum, itemnum, args)
sess.run(tf.initialize_all_variables())

best_valid_auc = 0.5
best_iter = 0

for i in range(args.maximum_epochs):
    for _ in range(oneiteration / args.batch_size):
        batch = sampler.next_train_batch()
        batch_u, batch_i, batch_j, batch_oi, batch_oj = batch
Exemple #9
0
        model = CAST6(usernum, itemnum, ratingnum, args)
    elif args.model == "cast_7":
        model = CAST7(usernum, itemnum, ratingnum, args)
    elif args.model == "cast_8":
        model = CAST8(usernum, itemnum, ratingnum, args)
    elif args.model == "cast_9":
        model = CAST9(usernum, itemnum, ratingnum, args)
    elif args.model == "sasrec":
        model = SASRec(usernum, itemnum, args)
    elif args.model == "sasrec_static":
        model = SASRec(usernum, itemnum, args, static=True)

    # SAMPLER
    print('usernum', usernum, 'itemnum', itemnum)
    sampler = WarpSampler(args, train, usernum, itemnum,
                          sample_func=sample_function,
                          batch_size=args.batch_size, maxlen=args.maxlen, n_workers=1)

    sess.run(tf.global_variables_initializer())

    # Set train dir
    now = datetime.now()
    TRAIN_FILES_PATH = os.path.join(
        MODEL_PATH, os.path.basename(args.dataset), '{}_{}'.format(args.train_dir, now.strftime("%m-%d-%Y-%H-%M-%S")))

    # Allow saving of model
    MODEL_SAVE_PATH = os.path.join(TRAIN_FILES_PATH, 'model.ckpt')
    saver = tf.train.Saver()

    if args.test_model:
        if os.path.exists(args.test_model):
Exemple #10
0
num_batch = len(train) / args.batch_size
cc = 0.0
for u in train:
    cc += len(train[u])
print 'average sequence length: %.2f' % (cc / len(train))

f = open(os.path.join(args.dataset + '_' + args.train_dir, 'log.txt'), 'w')
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)

sampler = WarpSampler(train,
                      listnum,
                      itemnum,
                      creator_list_dict,
                      batch_size=args.batch_size,
                      maxlen=args.maxlen,
                      n_workers=3)
model = Model(creatorNum, itemnum, args)
sess.run(tf.initialize_all_variables())

T = 0.0
t0 = time.time()

threshold = 200

#try:
for epoch in range(1, args.num_epochs + 1):

    for step in tqdm(range(num_batch),
Exemple #11
0
def train_sasrec(n_args):
    if not os.path.exists("../../prediction_result/" + n_args.o_filename +
                          ".csv"):
        if not os.path.isdir(n_args.dataset + '_' + n_args.train_dir):
            os.makedirs(n_args.dataset + '_' + n_args.train_dir)
        with open(
                os.path.join(n_args.dataset + '_' + n_args.train_dir,
                             'args.txt'), 'w') as f:
            f.write('\n'.join([
                str(k) + ',' + str(v)
                for k, v in sorted(vars(n_args).items(), key=lambda x: x[0])
            ]))
        f.close()

        dataset = data_partition(n_args.dataset, n_args.p_dataset, None)
        recall_s1 = Get_Recall_S1(n_args.recall_ds)
        # recall_v = Get_Recall_S1(n_args.recall_v)
        [
            user_train, user_valid, user_test, user_pred, user_valid_, usernum,
            itemnum
        ] = dataset
        num_batch = math.ceil(len(user_train) / n_args.batch_size)
        cc = 0.0
        for u in user_train:
            cc += len(user_train[u])
        print('average sequence length: %.2f' % (cc / len(user_train)))

        f = open(
            os.path.join(n_args.dataset + '_' + n_args.train_dir, 'log.txt'),
            'w')
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        sess = tf.Session(config=config)

        sampler = WarpSampler(user_train,
                              usernum,
                              itemnum,
                              batch_size=n_args.batch_size,
                              maxlen=n_args.maxlen,
                              n_workers=4)
        model = Model(usernum, itemnum, n_args)

        if not os.listdir("../user_data/model_data/"):
            sess.run(tf.global_variables_initializer())
            T = 0.0
            t0 = time.time()
            try:
                for epoch in range(1, n_args.num_epochs + 1):
                    for step in tqdm(range(num_batch),
                                     total=num_batch,
                                     ncols=70,
                                     leave=False,
                                     unit='b'):
                        u, seq, pos, neg = sampler.next_batch()
                        auc, loss, _ = sess.run(
                            [model.auc, model.loss, model.train_op], {
                                model.u: u,
                                model.input_seq: seq,
                                model.pos: pos,
                                model.neg: neg,
                                model.is_training: True
                            })

                    if epoch % 20 == 0:
                        t1 = time.time() - t0
                        T += t1
                        print('Evaluating')
                        t_test = evaluate(model, dataset, n_args, sess)
                        t_valid = evaluate_valid(model, dataset, n_args, sess)
                        print('')
                        print(
                            'epoch:%d, time: %f(s), valid (NDCG@50: %.4f, HR@10: %.4f), test (NDCG@50: %.4f, HR@10: %.4f)'
                            % (epoch, T, t_valid[0], t_valid[1], t_test[0],
                               t_test[1]))
                        f.write(str(t_valid) + ' ' + str(t_test) + '\n')
                        f.flush()
                        t0 = time.time()
                saver = tf.train.Saver()
                saver.save(sess, "../user_data/model_data/sasrec_model.ckpt")
                predict_result(model,
                               dataset,
                               recall_s1,
                               n_args,
                               sess,
                               type='pred')
                # predict_result(model, dataset, recall_v, args, sess, type='valid')

            except:
                sampler.close()
                f.close()
                exit(1)
        else:
            saver = tf.train.Saver()
            with tf.Session() as sess:
                saver.restore(sess,
                              "../user_data/model_data/sasrec_model.ckpt")
                predict_result(model,
                               dataset,
                               recall_s1,
                               n_args,
                               sess,
                               type='pred')
                # predict_result(model, dataset, recall_v, args, sess, type='valid')

        f.close()
        sampler.close()
        print("Done")
Exemple #12
0
    # get user-item matrix
    # make feature as dense matrix
    args = parse_args()
    Filename = args.dataset
    Filepath = 'Data/' + Filename
    dataset = Dataset.Dataset(Filepath)
    train, test = dataset.trainMatrix, dataset.testRatings
    textualfeatures, imagefeatures = dataset.textualfeatures, dataset.imagefeatures
    # print(type(features))

    n_users, n_items = max(train.shape[0],
                           test.shape[0]), max(train.shape[1], test.shape[1])
    print(n_users, n_items)
    # create warp sampler
    sampler = WarpSampler(train,
                          batch_size=args.batch_size,
                          n_negative=args.num_neg,
                          check_negative=True)

    # WITHOUT features
    # Train a user-item joint embedding, where the items a user likes will be pulled closer to this users.
    # Once the embedding is trained, the recommendations are made by finding the k-Nearest-Neighbor to each user.

    model = MAML(
        n_users,
        n_items,
        # enable feature projection
        imagefeatures=imagefeatures,
        textualfeatures=textualfeatures,
        embed_dim=64,
        batch_size=args.batch_size,
        # N_negatvie
Exemple #13
0
                'produce'] or item in user_validation[u][
                    'consume'] or item in user_test[u][
                        'consume'] or item in sample or item not in consumed_items:
            item = np.random.randint(0, itemnum)
        sample.add(item)
        sample_list.append(item)
        owner_list.append(owner[item])

    return sample_list, owner_list


sampler = WarpSampler(user_train,
                      user_validation,
                      user_test,
                      owner,
                      usernum,
                      itemnum,
                      consumed_items,
                      batch_size=args.batch_size,
                      n_workers=1)

best_iter = 0
best_valid_mrr = 1e-6
prev_valid_mrr = 1e-6

f = open(
    'out/%s_%s_%d_%d_%d_%g_%g.txt' %
    (args.dataset, args.alg, args.n_layers, args.emb_dim, args.n_neurons,
     args.lambda1, args.lambda2), 'w')

############# Architecture
Exemple #14
0
dataset = data_partition('./data/movielens.txt')
# [user_train, user_valid, user_test, user_num, item_num] = dataset
[user_train, user_valid, user_test, user_num, item_num, item_count,
 cum_table] = dataset
num_batch = len(user_train) // (batch_size)

cc = 0.0
for u in user_train:
    cc += len(user_train[u])
print('average sequence length: {:.2f}'.format(cc / len(user_train)))

sampler = WarpSampler(user_train,
                      user_num,
                      item_num,
                      cum_table,
                      batch_size=batch_size,
                      max_len=max_len,
                      n_workers=3)

model, emb = build_model(max_len=max_len,
                         input_dim=item_num + 1,
                         embedding_dim=50,
                         feed_forward_units=50,
                         head_num=1,
                         block_num=2,
                         dropout_rate=0.2)

optimizer = AdamOptimizer(0.001)
tbcb = TensorBoard(log_dir='/logs',
                   histogram_freq=1,
Exemple #15
0
cc = 0.0

# for u in user_train:
#     cc += len(user_train[u])
# print 'average sequence length: %.2f' % (cc / len(user_train))

file_log = "log.txt_%s" % args.train_type
f = open(os.path.join(args.folder_dataset_model, file_log), 'w')
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)

sampler = WarpSampler(user_train,
                      usernum,
                      itemnum,
                      batch_size=args.batch_size,
                      maxlen=args.max_seq_len,
                      n_workers=3)
model = Model(usernum, itemnum, args)
sess.run(tf.initialize_all_variables())

T = 0.0
t0 = time.time()

best_epoch = 0.
best_valid = [0., 0.]
best_test = [0., 0.]

for epoch in range(1, args.num_epochs + 1):

    epoch_loss = []
Exemple #16
0
        for line in open(item_file, 'r')
    }
    total_sample = sum(
        [int(line.split('\t')[2]) for line in open(item_file, 'r')])
    test_sample = sum([
        1 for fn in glob.glob(os.path.join(test_triplet_path, '*'))
        for line in open(fn, 'r')
    ])
    cluster = tf.train.ClusterSpec(cluster_config)
    num_batch = (total_sample - test_sample) / (len(cluster_config['worker']) *
                                                int(batch_size))
    if task == 'ps':
        parameter_server(cluster, len(item2weight), int(embed_dim), int(idx),
                         int(epoch), item_embedding_file)
    else:
        per_user = int(
            np.ceil(len(user2weight) * 1.0 / len(cluster_config['worker'])))
        user_chunk = list(chunks(range(len(user2weight)), per_user))
        start, end = user_chunk[int(idx)][0], user_chunk[int(idx)][-1]
        sampler = WarpSampler(pos_pair_path,
                              batch_size=int(batch_size),
                              n_workers=int(n_workers),
                              item2weight=item2weight,
                              negative_num=int(negative_num),
                              start=start,
                              end=end)
        evaluator = Evaluator(test_triplet_path, start, end)
        worker(cluster, len(item2weight), int(embed_dim), int(idx), int(epoch),
               float(alpha), float(learning_rate), sampler, evaluator,
               num_batch, end - start + 1)
Exemple #17
0
# print 'itemnum: %d' % (itemnum)   # my code
num_batch = len(user_train) / args.batch_size
cc = 0.0
for u in user_train:
    cc += len(user_train[u])
print 'average sequence length: %.2f' % (cc / len(user_train))

f = open(os.path.join(args.dataset + '_' + args.train_dir, 'log.txt'), 'w')
config = tf.ConfigProto()  # configure the session
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU, increase GPU utilization
config.allow_soft_placement = True  # allow TensorFlow to automatically choose an existing and supported device to run the operations
sess = tf.Session(config=config)  # Driver for Graph execution

sampler = WarpSampler(user_train,
                      usernum,
                      itemnum,
                      batch_size=args.batch_size,
                      maxlen=args.maxlen,
                      n_workers=3)
text_emb = np.load(
    'data/reviews_emb.npy')  # my code, loading the text embedding of item
print('The loaded embedding type is {}'.format(text_emb.dtype)
      )  # my code, show loaded embedding's data type
# text_emb = np.random.rand(itemnum+1, 300)   # my code, for text embeddings
# text_emb = tf.random.uniform(shape=[itemnum+1, 300], minval=0, maxval=1, dtype=tf.float32, seed=10)  # my code
model = Model(usernum, itemnum, args)
# sess.run(tf.global_variables_initializer)   # my code
sess.run(tf.initialize_all_variables())

T = 0.0
t0 = time.time()
Exemple #18
0
print("\nThere are {0} users {1} items \n".format(usernum, itemnum))
print("Average sequence length: {0}\n".format(cc / len(user_train)))
print("Maximum length of sequence: {0}\n".format(max_len))

f = open(
    os.path.join(args.dataset + '_' + args.train_dir,
                 'log_{}.txt'.format(timestamp)), 'w')
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)

sampler = WarpSampler(user_train,
                      usernum,
                      itemnum,
                      batch_size=args.batch_size,
                      maxlen=args.maxlen,
                      threshold_user=args.threshold_user,
                      threshold_item=args.threshold_item,
                      n_workers=3)
model = Model(usernum, itemnum, args)
sess.run(tf.global_variables_initializer())

T = 0.0
t_test = evaluate(model, dataset, args, sess)
t_valid = evaluate_valid(model, dataset, args, sess)
print_result(0, 0.0, t_valid, t_test)
print_result(0, 0.0, t_valid, t_test, f=f)
# print("[0, 0.0, {0}, {1}, {2}, {3}],".format(t_valid[0], t_valid[1], t_test[0], t_test[1]))

t0 = time.time()
Exemple #19
0
    # negative and positive matrix have the same shape
    n_users, n_items = user_item_matrix.shape
    print(n_users, n_items)
    # make feature as dense matrix
    dense_features = features.toarray() + 1E-10

    # get train/valid/test user-item matrices
    train, valid, test = split_data(user_item_matrix)
    train_exp_neg, valid_exp_neg, test_exp_neg = split_data(
        user_item_exp_neg_matrix)

    # create warp sampler
    sampler = WarpSampler(train,
                          train_exp_neg,
                          batch_size=BATCH_SIZE,
                          n_negative=N_NEGATIVE,
                          check_negative=True)

    # WITHOUT features
    # Train a user-item joint embedding, where the items a user likes will be pulled closer to this users.
    # Once the embedding is trained, the recommendations are made by finding the k-Nearest-Neighbor to each user.

    #@sandro Uncommented due to being not needed
    #  model = CML(n_users,
    #             n_items,
    #             # set features to None to disable feature projection
    #             features=None,x
    #             # size of embedding
    #             embed_dim=EMBED_DIM,
    #             # the size of hinge loss margin.