def main():

    args = get_args()
    data_path = os.path.join(args.iobasedir, 'processed/downloads',
                             args.data_set)
    log_path = os.path.join(args.iobasedir, 'logs')
    log_file = os.path.join(args.iobasedir, 'logs', 'UB.log')
    mkdirp(log_path)
    set_logger(log_file)

    for filename in os.listdir(data_path):
        data_file = os.path.join(data_path, filename)
        topic = filename[:-5]

        docs, refs = load_data(data_file)
        if not refs:
            continue

        if not args.summary_size:
            summary_size = len(' '.join(refs[0]).split(' '))
        else:
            summary_size = int(args.summary_size)

        logger.info('Topic ID: %s ', topic)
        logger.info('###')
        logger.info('Summmary_len: %d', summary_size)

        algos = ['UB1', 'UB2']
        for algo in algos:
            get_summary_scores(algo, docs, refs, summary_size, language, rouge)

        logger.info('###')
Esempio n. 2
0
def main():

    args = get_args()
    rouge_dir = os.path.join(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
        'rouge/RELEASE-1.5.5/')

    data_path = os.path.join(args.iobasedir, args.data_setpath)
    log_path = os.path.join(args.iobasedir, 'logs')
    log_file = os.path.join(
        args.iobasedir, 'logs',
        'baselines_%s_%s.log' % (args.data_set, args.summary_size))
    mkdirp(log_path)
    set_logger(log_file)

    for filename in os.listdir(data_path):
        data_file = os.path.join(data_path, filename)
        topic = filename[:-5]

        try:
            docs, refs = load_data(data_file)
        except:
            pass
        if not refs:
            continue

        if not args.summary_size:
            summary_size = len(" ".join(refs[0]).split(' '))
        else:
            summary_size = int(args.summary_size)

        logger.info('Topic ID: %s', topic)
        logger.info('###')
        logger.info('Summmary_len: %d', summary_size)

        rouge = Rouge(rouge_dir)
        algos = ['UB1', 'UB2', 'ICSI', 'Luhn', 'LexRank', 'LSA', 'KL']
        for algo in algos:
            get_summary_scores(algo, docs, refs, summary_size, args.language,
                               rouge)
        rouge._cleanup()
        logger.info('###')
def test():
    # Load data
    logger.info("✔︎ Loading data...")

    logger.info("✔︎ Training data processing...")
    train_data = dh.load_data(Config().TRAININGSET_DIR)

    logger.info("✔︎ Test data processing...")
    test_data = dh.load_data(Config().TESTSET_DIR)

    logger.info("✔︎ Load negative sample...")
    with open(Config().NEG_SAMPLES, 'rb') as handle:
        neg_samples = pickle.load(handle)

    # Load model
    dr_model = torch.load(MODEL_DIR)

    dr_model.eval()

    item_embedding = dr_model.encode.weight
    hidden = dr_model.init_hidden(Config().batch_size)

    hitratio_numer = 0
    hitratio_denom = 0
    ndcg = 0.0

    for i, x in enumerate(
            dh.batch_iter(train_data,
                          Config().batch_size,
                          Config().seq_len,
                          shuffle=False)):
        uids, baskets, lens = x
        dynamic_user, _ = dr_model(baskets, lens, hidden)
        for uid, l, du in zip(uids, lens, dynamic_user):
            scores = []
            du_latest = du[l - 1].unsqueeze(0)

            # calculating <u,p> score for all test items <u,p> pair
            positives = test_data[test_data['userID'] == uid].baskets.values[
                0]  # list dim 1
            p_length = len(positives)
            positives = torch.LongTensor(positives)

            # Deal with positives samples
            scores_pos = list(
                torch.mm(du_latest,
                         item_embedding[positives].t()).data.numpy()[0])
            for s in scores_pos:
                scores.append(s)

            # Deal with negative samples
            negtives = random.sample(list(neg_samples[uid]), Config().neg_num)
            negtives = torch.LongTensor(negtives)
            scores_neg = list(
                torch.mm(du_latest,
                         item_embedding[negtives].t()).data.numpy()[0])
            for s in scores_neg:
                scores.append(s)

            # Calculate hit-ratio
            index_k = []
            for k in range(Config().top_k):
                index = scores.index(max(scores))
                index_k.append(index)
                scores[index] = -9999
            hitratio_numer += len((set(np.arange(0, p_length)) & set(index_k)))
            hitratio_denom += p_length

            # Calculate NDCG
            u_dcg = 0
            u_idcg = 0
            for k in range(Config().top_k):
                if index_k[k] < p_length:  # 长度 p_length 内的为正样本
                    u_dcg += 1 / math.log(k + 1 + 1, 2)
                u_idcg += 1 / math.log(k + 1 + 1, 2)
            ndcg += u_dcg / u_idcg

    hitratio = hitratio_numer / hitratio_denom
    ndcg = ndcg / len(train_data)
    print('Hit ratio[{0}]: {1}'.format(Config().top_k, hitratio))
    print('NDCG[{0}]: {1}'.format(Config().top_k, ndcg))
def train():
    # Load data
    logger.info("✔︎ Loading data...")

    logger.info("✔︎ Training data processing...")
    train_data = dh.load_data(Config().TRAININGSET_DIR)

    logger.info("✔︎ Validation data processing...")
    validation_data = dh.load_data(Config().VALIDATIONSET_DIR)

    logger.info("✔︎ Test data processing...")
    test_data = dh.load_data(Config().TESTSET_DIR)

    logger.info("✔︎ Load negative sample...")
    with open(Config().NEG_SAMPLES, 'rb') as handle:
        neg_samples = pickle.load(handle)

    # Model config
    model = DRModel(Config())

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=Config().learning_rate)

    def bpr_loss(uids, baskets, dynamic_user, item_embedding):
        """
        Bayesian personalized ranking loss for implicit feedback.
        For an intro on BPR: https://towardsdatascience.com/recommender-system-using-bayesian-personalized-ranking-d30e98bba0b9

        Args:
            uids: batch of users' ID
            baskets: batch of users' baskets
            dynamic_user: batch of users' dynamic representations
            item_embedding: item_embedding matrix
        """
        loss = 0
        for uid, bks, du in zip(uids, baskets, dynamic_user):
            du_p_product = torch.mm(
                du, item_embedding.t())  # shape: [pad_len, num_item]
            loss_u = []  # loss for user
            for t, basket_t in enumerate(bks):
                if basket_t[0] != 0 and t != 0:  # wht skipping the first item??
                    pos_idx = torch.LongTensor(basket_t)

                    # Sample negative products
                    neg = random.sample(list(neg_samples[uid]), len(basket_t))
                    neg_idx = torch.LongTensor(neg)

                    # Score p(u, t, v > v')
                    score = du_p_product[t -
                                         1][pos_idx] - du_p_product[t -
                                                                    1][neg_idx]

                    # Average Negative log likelihood for basket_t
                    loss_u.append(torch.mean(-torch.nn.LogSigmoid()(score)))
            for i in loss_u:
                loss = loss + i / len(loss_u)
        avg_loss = torch.div(loss, len(baskets))
        return avg_loss

    def train_model():
        model.train()  # turn on training mode for dropout
        dr_hidden = model.init_hidden(Config().batch_size)
        train_loss = 0
        start_time = time.time()
        num_batches = ceil(len(train_data) / Config().batch_size)
        for i, x in enumerate(
                dh.batch_iter(train_data,
                              Config().batch_size,
                              Config().seq_len,
                              shuffle=True)):
            # baskets are padded to seq_length = 12, with [0]
            # lens is a list of length corresponding to how many real baskets are
            uids, baskets, lens = x
            model.zero_grad()  # 如果不置零,Variable 的梯度在每次 backward 的时候都会累加
            dynamic_user, _ = model(baskets, lens, dr_hidden)

            loss = bpr_loss(uids, baskets, dynamic_user, model.encode.weight)
            loss.backward()

            # Clip to avoid gradient exploding
            torch.nn.utils.clip_grad_norm_(model.parameters(), Config().clip)

            # Parameter updating
            optimizer.step()
            train_loss += loss.data

            # Logging
            if i % Config().log_interval == 0 and i > 0:
                elapsed = (time.time() - start_time) / Config().log_interval
                cur_loss = train_loss.item() / Config(
                ).log_interval  # turn tensor into float
                train_loss = 0
                start_time = time.time()
                logger.info(
                    '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.4f} |'
                    .format(epoch, i, num_batches, elapsed, cur_loss))

    def validate_model():
        model.eval()
        dr_hidden = model.init_hidden(Config().batch_size)
        val_loss = 0
        start_time = time.time()
        num_batches = ceil(len(validation_data) / Config().batch_size)
        for i, x in enumerate(
                dh.batch_iter(validation_data,
                              Config().batch_size,
                              Config().seq_len,
                              shuffle=False)):
            uids, baskets, lens = x
            dynamic_user, _ = model(baskets, lens, dr_hidden)
            loss = bpr_loss(uids, baskets, dynamic_user, model.encode.weight)
            val_loss += loss.data

        # Logging
        elapsed = (time.time() - start_time) * 1000 / num_batches
        val_loss = val_loss.item() / num_batches
        logger.info(
            '[Validation]| Epochs {:3d} | Elapsed {:02.2f} | Loss {:05.4f} |'.
            format(epoch, elapsed, val_loss))
        return val_loss

    def test_model():
        model.eval()
        item_embedding = model.encode.weight
        dr_hidden = model.init_hidden(Config().batch_size)

        hitratio_numer = 0
        hitratio_denom = 0
        ndcg = 0.0

        for i, x in enumerate(
                dh.batch_iter(train_data,
                              Config().batch_size,
                              Config().seq_len,
                              shuffle=False)):
            uids, baskets, lens = x
            dynamic_user, _ = model(baskets, lens, dr_hidden)
            for uid, l, du in zip(uids, lens, dynamic_user):
                scores = []
                # we use the last output as user representation
                du_latest = du[l - 1].unsqueeze(0)

                # calculating <u,p> score for all test items <u,p> pair
                positives = test_data[test_data['userID'] ==
                                      uid].baskets.values[0]  # list dim 1
                p_length = len(positives)
                positives = torch.LongTensor(positives)

                # Deal with positives samples
                scores_pos = list(
                    torch.mm(du_latest,
                             item_embedding[positives].t()).data.numpy()[0])
                for s in scores_pos:
                    scores.append(s)

                # Deal with negative samples
                negtives = random.sample(list(neg_samples[uid]),
                                         Config().neg_num)
                negtives = torch.LongTensor(negtives)
                scores_neg = list(
                    torch.mm(du_latest,
                             item_embedding[negtives].t()).data.numpy()[0])
                for s in scores_neg:
                    scores.append(s)

                # Calculate hit-ratio
                index_k = []
                for k in range(Config().top_k):
                    index = scores.index(max(scores))
                    index_k.append(index)
                    scores[index] = -9999
                hitratio_numer += len(
                    (set(np.arange(0, p_length)) & set(index_k)))
                hitratio_denom += p_length

                # Calculate NDCG
                u_dcg = 0
                u_idcg = 0
                for k in range(Config().top_k):
                    if index_k[k] < p_length:  # 长度 p_length 内的为正样本
                        u_dcg += 1 / math.log(k + 1 + 1, 2)
                    u_idcg += 1 / math.log(k + 1 + 1, 2)
                ndcg += u_dcg / u_idcg

        hit_ratio = hitratio_numer / hitratio_denom
        ndcg = ndcg / len(train_data)
        logger.info(
            '[Test]| Epochs {:3d} | Hit ratio {:02.4f} | NDCG {:05.4f} |'.
            format(epoch, hit_ratio, ndcg))
        return hit_ratio, ndcg

    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    logger.info('Save into {0}'.format(out_dir))
    checkpoint_dir = out_dir + '/model-{epoch:02d}-{hitratio:.4f}-{ndcg:.4f}.model'

    best_hit_ratio = None

    try:
        # Training
        for epoch in range(Config().epochs):
            train_model()
            logger.info('-' * 89)

            val_loss = validate_model()
            logger.info('-' * 89)

            hit_ratio, ndcg = test_model()
            logger.info('-' * 89)

            # Checkpoint
            if not best_hit_ratio or hit_ratio > best_hit_ratio:
                with open(
                        checkpoint_dir.format(epoch=epoch,
                                              hitratio=hit_ratio,
                                              ndcg=ndcg), 'wb') as f:
                    torch.save(model, f)
                best_hit_ratio = hit_ratio
    except KeyboardInterrupt:
        logger.info('*' * 89)
        logger.info('Early Stopping!')
Esempio n. 5
0

if __name__ == "__main__":
    # LOAD RAW DATA $ WORD VECTORS
    EVAL_DATASET = '../../dataset/PMtask_TestSet.xml'
    MODE = "eval"

    WV_PATH = '../../embeddings/PubMed-w2v.txt'
    WV_DIMS = 200
    MAX_SENT_LENGTH = 45
    MAX_SENTS = 23

    print("loading word embeddings...")
    word2idx, idx2word, embeddings = load_word_vectors(WV_PATH, WV_DIMS, True)

    docs, labels, ids = load_data(EVAL_DATASET, MODE)

    # convert strings to lists of tokens
    print("Tokenizing...")
    docs = [[text_to_word_sequence(sent) for sent in sent_tokenize(doc)]
            for doc in docs]

    # convert words to word indexes
    print("Vectorizing...")
    docs = [vectorize_doc(doc, word2idx, MAX_SENTS, MAX_SENT_LENGTH)
            for doc in docs]
    docs = numpy.array(docs)

    # LOAD SAVED MODEL
    print("Loading model from disk...", end=" ")
    model_name = "../experiments/task1_hGRU_2017-10-14 17:25:22.hdf5"
Esempio n. 6
0
def test(saved_file):
    # Load data
    logger.info("✔︎ Loading data...")

    logger.info("✔︎ Training data processing...")
    train_data = dh.load_data(Config().TRAININGSET_DIR)

    logger.info("✔︎ Test data processing...")
    test_data = dh.load_data(Config().TESTSET_DIR)

    logger.info("✔︎ Load negative sample...")
    # with open(Config().NEG_SAMPLES, 'rb') as handle:
    #     neg_samples = pickle.load(handle)
    neg_samples = {}

    item_list = [i for i in range(336)]

    # Load model
    MODEL_DIR = dh.load_model_file(saved_file)

    dr_model = torch.load(MODEL_DIR)

    dr_model.eval()

    item_embedding = dr_model.encode.weight
    hidden = dr_model.init_hidden(Config().batch_size)

    hitratio_numer = 0
    hitratio_denom = 0
    hitratio_numer = 0
    hitratio_denom = 0
    hitratio_numer_10 = 0
    hitratio_numer_5 = 0
    ndcg = 0.0
    ndcg_denom = 0
    hitratio_list_5 = []
    hitratio_list_10 = []
    ndcg_list = []

    for i, x in enumerate(
            tqdm(
                dh.batch_iter(train_data,
                              Config().batch_size,
                              Config().seq_len_test,
                              shuffle=False))):
        uids, baskets, lens = x
        dynamic_user, _ = dr_model(baskets, lens, hidden)
        for uid, l, du in zip(uids, lens, dynamic_user):
            scores = []
            du_latest = du[l - 1].unsqueeze(0)

            # Deal with positives samples
            positives = test_data[test_data['userID'] ==
                                  uid].baskets.values[0][:-1]  # list dim 1
            p_length = len(positives)
            positives = torch.LongTensor(positives)
            print("positives:   ", positives)

            # calculating <u,p> score for all test items <u,p> pair
            scores_pos = list(
                torch.mm(du_latest,
                         item_embedding[positives].t()).data.cpu().numpy()[0])
            for s in scores_pos:
                scores.append(s)
            print("score_pos:   ", score_pos)

            # Deal with negative samples
            neg_item_list = list(set(item_list).difference(set(positives)))
            negtives = random.sample(neg_item_list, Config().neg_num)
            negtives = torch.LongTensor(negtives)
            scores_neg = list(
                torch.mm(du_latest,
                         item_embedding[negtives].t()).data.cpu().numpy()[0])
            for s in scores_neg:
                scores.append(s)

            print("scores:   ", scores)

            # Calculate hit-ratio
            index_k = []
            for k in range(Config().top_k):
                index = scores.index(max(scores))
                index_k.append(index)
                scores[index] = -9999
            print("index_k:   ", index_k)
            hr_5_numer = len((set(np.arange(0, p_length)) & set(index_k[0:5])))
            hr_10_numer = len((set(np.arange(0, p_length)) & set(index_k)))
            hitratio_numer_10 += hr_10_numer  # np.arange()产生等差数列
            hitratio_numer_5 += hr_5_numer
            hitratio_denom += p_length
            hitratio_list_5.append(hr_5_numer / p_length)
            hitratio_list_10.append(hr_10_numer / p_length)
            # print("hitratio_list_5:   ", hitratio_list_5)
            # print("hitratio_list_10:   ", hitratio_list_10)
            # hitratio_numer += len((set(np.arange(0, p_length)) & set(index_k)))
            # hitratio_denom += p_length

            # Calculate NDCG
            u_dcg = 0
            u_idcg = 0
            for k in range(Config().top_k):
                if index_k[k] < p_length:  # 长度 p_length 内的为正样本
                    u_dcg += 1 / math.log(k + 1 + 1, 2)
                u_idcg += 1 / math.log(k + 1 + 1, 2)
            ndcg += u_dcg / u_idcg
            ndcg_denom += 1
            ndcg_list.append(u_dcg / u_idcg)
            # print("ndcg_list:   ", ndcg_list)

    hit_ratio_5 = hitratio_numer_5 / hitratio_denom
    hit_ratio_10 = hitratio_numer_10 / hitratio_denom
    ndcg = ndcg / ndcg_denom
    print('Hit ratio@5: {1} | Hit ratio@10: {1}'.format(
        hit_ratio_5, hit_ratio_10))
    print('NDCG[{0}]: {1}'.format(Config().top_k, ndcg))
    return hitratio_list_5, hitratio_list_10, ndcg_list
Esempio n. 7
0
CORPUS = 'dataset/PMtask_Triage_TrainingSet.xml'
WV_PATH = 'embeddings/PubMed-w2v.txt'
WV_DIMS = 200
PERSIST = True  # if True, then save the model to disk
####################
MAX_SENT_LENGTH = 45
MAX_SENTS = 23

##############################################
# Prepare Data
##############################################
print("loading word embeddings...")
word2idx, idx2word, embeddings = load_word_vectors(WV_PATH, WV_DIMS, True)

print("loading data...")
docs, labels, ids = load_data(CORPUS, mode=MODE)
# word_vectors = load_word_vectors(args.embeddings)

# convert strings to lists of tokens
print("Tokenizing...")
docs = [[text_to_word_sequence(sent) for sent in sent_tokenize(doc)]
        for doc in docs]

# convert words to word indexes
print("Vectorizing...")
docs = [
    vectorize_doc(doc, word2idx, MAX_SENTS, MAX_SENT_LENGTH) for doc in docs
]
docs = numpy.array(docs)

if MODE == "train":
Esempio n. 8
0
if __name__ == '__main__':
    out_file = open(args.out, 'w')
    algos = ['UB1', 'UB2', 'LexRank', 'TextRank', 'Luhn', 'ICSI']
    R1 = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0}
    R2 = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0}
    Rl = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0}
    Rsu = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0}
    blog_sum = .0
    for t in types:
        cur_path = args.path + '/' + t + '/'
        file_names = os.listdir(cur_path)
        blog_sum += len(file_names)
        for filename in tqdm(file_names):
            data_file = os.path.join(cur_path, filename)
            docs, refs = load_data(data_file)
            sum_len = len(' '.join(refs[0]).split(' ')) * args.sum_len
            print('####', filename, '####')
            out_file.write(filename + '\n')
            for algo in algos:
                r1, r2, rl, rsu = get_summary_scores(algo, docs, refs, sum_len)
                print algo, r1, r2, rl, rsu
                out_file.write(algo + ' ' + str(r1) + ' ' + str(r2) + ' ' + str(rl) + ' ' + str(rsu) + '\n')
                R1[algo] += r1
                R2[algo] += r2
                Rl[algo] += rl
                Rsu[algo] += rsu
    out_file.close()
    print('Final Results')
    for algo in algos:
        R1[algo] /= blog_sum
Esempio n. 9
0
def train():
    # Load data
    logger.info("✔︎ Loading data...")

    logger.info("✔︎ Training data processing...")
    train_data = dh.load_data(Config().TRAININGSET_DIR)

    logger.info("✔︎ Validation data processing...")
    validation_data = dh.load_data(Config().VALIDATIONSET_DIR)

    logger.info("✔︎ Test data processing...")
    test_data = dh.load_data(Config().TESTSET_DIR)

    logger.info("✔︎ Load negative sample...")
    # with open(Config().NEG_SAMPLES, 'rb') as handle:
    #     neg_samples = pickle.load(handle)
    neg_samples = {}

    if torch.cuda.is_available():
        model = FVModel(Config()).cuda()
    else:
        model = FVModel(Config())

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=Config().learning_rate)

    def bpr_loss(uids, baskets, dynamic_user, item_list, item_embedding):
        """
        Bayesian personalized ranking loss for implicit feedback.

        Args:
            uids: batch of users' ID
            baskets: batch of users' baskets, baskets = daily food categories of users'
            dynamic_user: batch of users' dynamic representations
            item_embedding: item_embedding matrix
        """
        loss = 0
        for uid, bks, du in zip(uids, baskets, dynamic_user):
            du_p_product = torch.mm(
                du, item_embedding.t())  # shape: [pad_len, num_item]
            loss_u = []  # loss for user
            for t, basket_t in enumerate((bks)):
                if basket_t[0] != 0 and t != 0:
                    basket_t = basket_t[:
                                        -1]  # do not consoder the last number, which is weight changes indicator
                    pos_idx = torch.LongTensor(basket_t)

                    # Sample negative products
                    neg_item_list = list(
                        set(item_list).difference(set(basket_t)))
                    neg = random.sample(neg_item_list, len(basket_t))
                    neg_idx = torch.LongTensor(neg)

                    # Score p(u, t, v > v')
                    score = du_p_product[t -
                                         1][pos_idx] - du_p_product[t -
                                                                    1][neg_idx]

                    # Average Negative log likelihood for basket_t
                    loss_u.append(torch.mean(-torch.nn.LogSigmoid()(score)))
            for i in loss_u:
                loss = loss + i / len(loss_u)
        # avg_loss = torch.true_divide(loss, len(baskets))
        avg_loss = torch.div(loss, len(baskets))
        return avg_loss

    def train_model():
        model.train()  # turn on training mode for dropout
        dr_hidden = model.init_hidden(Config().batch_size)
        train_loss = 0
        start_time = time.clock()
        #start_time = time.perf_counter
        num_batches = ceil(len(train_data) / Config().batch_size)
        for i, x in enumerate(
                tqdm(
                    dh.batch_iter(train_data,
                                  Config().batch_size,
                                  Config().seq_len_train,
                                  shuffle=True))):
            uids, baskets, lens = x
            model.zero_grad()  # 如果不置零,Variable 的梯度在每次 backward 的时候都会累加
            dynamic_user, _ = model(baskets, lens, dr_hidden)

            loss = bpr_loss(uids, baskets, dynamic_user, item_list,
                            model.encode.weight)
            loss.backward()

            # Clip to avoid gradient exploding
            torch.nn.utils.clip_grad_norm_(model.parameters(), Config().clip)

            # Parameter updating
            optimizer.step()
            train_loss += loss.data

            # Logging
            if i % Config().log_interval == 0 and i > 0:
                elapsed = (time.clock() - start_time) / Config().log_interval
                cur_loss = train_loss.item() / Config(
                ).log_interval  # turn tensor into float
                train_loss = 0
                start_time = time.clock()
                logger.info(
                    '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.4f} |'
                    .format(epoch, i, num_batches, elapsed, cur_loss))

    def validate_model():
        model.eval()
        dr_hidden = model.init_hidden(Config().batch_size)
        val_loss = 0
        start_time = time.clock()
        num_batches = ceil(len(validation_data) / Config().batch_size)
        for i, x in enumerate(
                tqdm(
                    dh.batch_iter(validation_data,
                                  Config().batch_size,
                                  Config().seq_len_valid,
                                  shuffle=False))):
            uids, baskets, lens = x
            dynamic_user, _ = model(baskets, lens, dr_hidden)
            loss = bpr_loss(uids, baskets, dynamic_user, item_list,
                            model.encode.weight)
            val_loss += loss.data

        # Logging
        elapsed = (time.clock() - start_time) * 1000 / num_batches
        val_loss = val_loss.item() / num_batches
        logger.info(
            '[Validation]| Epochs {:3d} | Elapsed {:02.2f} | Loss {:05.4f} |'.
            format(epoch, elapsed, val_loss))
        return val_loss

    def test_model():
        model.eval()
        item_embedding = model.encode.weight
        dr_hidden = model.init_hidden(Config().batch_size)

        hitratio_numer = 0
        hitratio_denom = 0
        hitratio_numer_10 = 0
        hitratio_numer_5 = 0
        ndcg = 0.0
        ndcg_denom = 0

        for i, x in enumerate(
                tqdm(
                    dh.batch_iter(train_data,
                                  Config().batch_size,
                                  Config().seq_len_test,
                                  shuffle=False))):
            uids, baskets, lens = x
            dynamic_user, _ = model(baskets, lens, dr_hidden)
            for uid, l, du in zip(uids, lens, dynamic_user):
                scores = []
                du_latest = du[l - 1].unsqueeze(0)

                # calculating <u,p> score for all test items <u,p> pair
                positives = test_data[test_data['userID'] ==
                                      uid].baskets.values[0][:-1]  # list dim 1
                p_length = len(positives)
                positives = torch.LongTensor(positives)

                # Deal with positives samples
                scores_pos = list(
                    torch.mm(
                        du_latest,
                        item_embedding[positives].t()).data.cpu().numpy()[0])
                for s in scores_pos:
                    scores.append(s)

                # Deal with negative samples
                neg_item_list = list(set(item_list).difference(set(positives)))
                negtives = random.sample(neg_item_list, Config().neg_num)
                negtives = torch.LongTensor(negtives)
                scores_neg = list(
                    torch.mm(
                        du_latest,
                        item_embedding[negtives].t()).data.cpu().numpy()[0])
                for s in scores_neg:
                    scores.append(s)

                # Calculate hit-ratio
                index_k = []  # ranking list
                # k = 5 and k = 10
                for k in range(Config().top_k):
                    index = scores.index(
                        max(scores))  # score 最高的category的index
                    index_k.append(index)
                    scores[index] = -9999
                hitratio_numer_10 += len((set(np.arange(0, p_length))
                                          & set(index_k)))  # np.arange()产生等差数列
                hitratio_numer_5 += len(
                    (set(np.arange(0, p_length)) & set(index_k[0:5])))
                hitratio_denom += p_length

                # Calculate NDCG
                u_dcg = 0
                u_idcg = 0
                for k in range(Config().top_k):
                    if index_k[k] < p_length:  # 长度 p_length 内的为正样本
                        u_dcg += 1 / math.log(k + 1 + 1, 2)
                    u_idcg += 1 / math.log(k + 1 + 1, 2)
                ndcg += u_dcg / u_idcg
                ndcg_denom += 1

        hit_ratio_5 = hitratio_numer_5 / hitratio_denom
        hit_ratio_10 = hitratio_numer_10 / hitratio_denom
        ndcg = ndcg / ndcg_denom
        logger.info(
            '[Test]| Epochs {:3d} | Hit ratio@5 {:02.4f} | Hit ratio@10 {:02.4f} | NDCG {:05.4f} |'
            .format(epoch, hit_ratio_5, hit_ratio_10, ndcg))
        return hit_ratio_5, hit_ratio_10, ndcg

    timestamp = str(int(time.time()))
    out_dir = os.path.join(os.path.curdir, "runs", timestamp)
    item_list = [i for i in range(336)]
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    logger.info('Save into {0}'.format(out_dir))
    checkpoint_dir = out_dir + '/model-{epoch:02d}-{hitratio:.4f}-{ndcg:.4f}.model'

    best_hit_ratio = None

    # ==================== test
    # val_loss = validate_model()

    try:
        # Training
        for epoch in range(Config().epochs):
            train_model()
            logger.info('-' * 89)

            val_loss = validate_model()
            logger.info('-' * 89)

            hit_ratio_5, hit_ratio_10, ndcg = test_model()
            logger.info('-' * 89)

            # Checkpoint
            if not best_hit_ratio or hit_ratio_10 > best_hit_ratio:
                with open(
                        checkpoint_dir.format(epoch=epoch,
                                              hitratio=hit_ratio_10,
                                              ndcg=ndcg), 'wb') as f:
                    torch.save(model, f)
                best_hit_ratio = hit_ratio_10
    except KeyboardInterrupt:
        logger.info('*' * 89)
        logger.info('Early Stopping!')