Ejemplo n.º 1
0
    ]
    logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s',
                        level=logging.DEBUG,
                        datefmt='%d-%m-%Y %H:%M:%S',
                        handlers=handlers)
    logger.info(args)
    logger.info('Start training!')

    label_map = {'SUPPORTS': 0, 'REFUTES': 1, 'NOT ENOUGH INFO': 2}
    tokenizer = BertTokenizer.from_pretrained(args.bert_pretrain,
                                              do_lower_case=False)
    logger.info("loading training set")
    trainset_reader = DataLoader(args.train_path,
                                 label_map,
                                 tokenizer,
                                 args,
                                 batch_size=args.train_batch_size)
    logger.info("loading validation set")
    validset_reader = DataLoader(args.valid_path,
                                 label_map,
                                 tokenizer,
                                 args,
                                 batch_size=args.valid_batch_size,
                                 test=True)

    logger.info('initializing estimator model')
    bert_model = BertForSequenceEncoder.from_pretrained(args.bert_pretrain)
    ori_model = inference_model(bert_model, args)
    model = nn.DataParallel(ori_model)
    model = model.cuda()
    train_model(model, ori_model, args, trainset_reader, validset_reader)
Ejemplo n.º 2
0
        lambda t, y, s: tf.equal(tf.shape(y)[0], batch_size))
    iterator = dataset.make_initializable_iterator()
    return iterator


batch_size = 32
num_epochs = 200
restore = True

filename = tf.placeholder(tf.string, shape=[])
iterator = create_iterator(filename, batch_size)

length, token, label = iterator.get_next()

output = train_model(token, label, length, batch_size)
infer_output = inference_model(token, label, length, batch_size)
pred = tf.argmax(output, axis=2)

loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label,
                                                      logits=output)
cost = tf.reduce_mean(loss)
updates = tf.train.AdamOptimizer(1e-4).minimize(cost)

sess = tf.Session()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())

if restore == True:
    saver.restore(sess, tf.train.latest_checkpoint('../models'))

for i in range(num_epochs):
Ejemplo n.º 3
0
    handlers = [
        logging.FileHandler(os.path.abspath(args.outdir) + '/train_log.txt'),
        logging.StreamHandler()
    ]
    logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s',
                        level=logging.DEBUG,
                        datefmt='%d-%m-%Y %H:%M:%S',
                        handlers=handlers)
    logger.info(args)
    logger.info('Start training!')

    tokenizer = BertTokenizer.from_pretrained(args.bert_pretrain,
                                              do_lower_case=False)
    logger.info("loading training set")
    reader = DataLoader(args.train_path,
                        tokenizer,
                        args,
                        batch_size=args.train_batch_size)
    logger.info("loading validation set")
    valid_reader = DataLoader(args.valid_path,
                              tokenizer,
                              args,
                              batch_size=args.valid_batch_size)

    bert_model = BertForSequenceEncoder.from_pretrained(args.bert_pretrain)
    bert_model = bert_model.to(device)
    model = inference_model(bert_model, args, device)
    model = model.to(device)

    train(args, model, reader, valid_reader)
Ejemplo n.º 4
0
    args = parser.parse_args()

    if not os.path.exists(args.outdir):
        os.mkdir(args.outdir)
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s',
                        level=logging.DEBUG,
                        datefmt='%d-%m-%Y %H:%M:%S')
    logger.info(args)
    logger.info('Start testing!')

    label_map = {'SUPPORT': 0, 'CONTRADICT': 1, 'NOT_ENOUGH_INFO': 2}
    label_list = ['SUPPORT', 'CONTRADICT', 'NOT_ENOUGH_INFO']
    args.num_labels = len(label_map)
    tokenizer = AutoTokenizer.from_pretrained(args.pretrain)
    logger.info("loading validation set")
    validset_reader = DataLoaderTest(args,
                                     label_map,
                                     tokenizer,
                                     batch_size=args.batch_size)
    logger.info('initializing estimator model')
    bert = AutoModel.from_pretrained(args.pretrain).cuda()
    bert = bert.cuda()
    bert.eval()
    model = inference_model(bert, args)
    model.load_state_dict(torch.load(args.checkpoint)['model'])
    model = model.cuda()
    model.eval()
    eval_model(model, label_list, validset_reader, args.outdir, args.name)
Ejemplo n.º 5
0
def train_matrix_factorization_With_Feed_Neural():
    top_k = 10
    best_hr = best_ndcg = 0.0
    my_sample = MySampler(pruned_all_ratings, u_max_num, v_max_num)

    user_id = tf.placeholder(tf.int32, [None, 1], name="user_id")
    u_index = tf.placeholder(tf.int32, [None, user_max_interact],
                             name="u_index")
    u_val = tf.placeholder(tf.float32, [None, 1, user_max_interact],
                           name="u_val")
    u_interact_length = tf.placeholder(tf.int32, [None, 1],
                                       name="u_interact_length")

    item_id = tf.placeholder(tf.int32, [None, 1], name="item_id")
    v_index = tf.placeholder(tf.int32, [None, item_max_interact],
                             name="v_index")
    v_val = tf.placeholder(tf.float32, [None, 1, item_max_interact],
                           name="v_val")
    v_interact_length = tf.placeholder(tf.int32, [None, 1],
                                       name="u_interact_length")

    true_u_v = tf.placeholder(tf.float32, [None, 1], name="true_u_v")

    global_step = tf.Variable(0, name="global_step", trainable=False)
    pred_val, model_loss, network_params = inference_model(
        model_name, user_id, u_index, u_val, u_interact_length, item_id,
        v_index, v_val, v_interact_length, v_max_num, u_max_num,
        first_layer_size, last_layer_size, user_max_interact,
        item_max_interact, true_u_v, graph_hyper_params)

    train_step = None
    if graph_hyper_params['opt'] == 'adam':
        train_step = tf.train.AdamOptimizer(learn_rate).minimize(
            model_loss, global_step=global_step)
    elif graph_hyper_params['opt'] == 'adgrad':
        train_step = tf.train.AdagradOptimizer(learn_rate).minimize(
            model_loss, global_step=global_step)
    elif graph_hyper_params['opt'] == 'adadelta':
        train_step = tf.train.AdadeltaOptimizer(learn_rate).minimize(
            model_loss, global_step=global_step)
    else:
        print 'No optimizer !'

    batch_u_id = np.zeros((batch_size + neg_sample_size, 1)).astype('int32')
    batch_u_interact_length = np.zeros(
        (batch_size + neg_sample_size, 1)).astype('int32')
    batch_u = np.zeros(
        (batch_size + neg_sample_size, user_max_interact)).astype('int32')
    tmp_u = np.array([0] * user_max_interact).astype('int32')
    batch_u_val = np.zeros(
        (batch_size + neg_sample_size, 1, user_max_interact)).astype('float32')
    tmp_u_val = np.array([[0.0] * user_max_interact]).astype('float32')

    batch_v_id = np.zeros((batch_size + neg_sample_size, 1)).astype('int32')
    batch_v_interact_length = np.zeros(
        (batch_size + neg_sample_size, 1)).astype('int32')
    batch_v = np.zeros(
        (batch_size + neg_sample_size, item_max_interact)).astype('int32')
    tmp_v = np.array([0] * item_max_interact).astype('int32')
    batch_v_val = np.zeros(
        (batch_size + neg_sample_size, 1, item_max_interact)).astype('float32')
    tmp_v_val = np.array([[0.0] * item_max_interact]).astype('float32')
    batch_true_u_v = np.zeros(
        (batch_size + neg_sample_size, 1)).astype('float32')

    batch_u_test_id = np.zeros((100, 1)).astype('int32')
    batch_u_test_interact_length = np.zeros((100, 1)).astype('int32')
    batch_u_test = np.zeros((100, user_max_interact)).astype('int32')
    batch_u_test_val = np.zeros((100, 1, user_max_interact)).astype('float32')

    batch_v_test_id = np.zeros((100, 1)).astype('int32')
    batch_v_test_interact_length = np.zeros((100, 1)).astype('int32')
    batch_v_test = np.zeros((100, item_max_interact)).astype('int32')
    batch_v_test_val = np.zeros((100, 1, item_max_interact)).astype('float32')

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    # config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.7
    # sess = tf.Session(config=config)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    for epoch in range(epochs):
        np.random.shuffle(pruned_all_ratings)
        one_epoch_loss = one_epoch_batchnum = 0.0
        for index in range(len(pruned_all_ratings) / batch_size):
            train_sample_index = 0
            for u_i, v_i, r_i, t_i in pruned_all_ratings[index *
                                                         batch_size:(index +
                                                                     1) *
                                                         batch_size]:
                batch_u_id[train_sample_index], batch_v_id[
                    train_sample_index] = u_i, v_i
                batch_u_interact_length[train_sample_index][0] = model_user_in[
                    u_i]["l"]
                batch_v_interact_length[train_sample_index][0] = model_item_in[
                    v_i]["l"]

                batch_u[train_sample_index] = model_user_in[u_i]["k"]
                batch_u_val[train_sample_index][0] = model_user_in[u_i]["v"]

                batch_v[train_sample_index] = model_item_in[v_i]["k"]
                batch_v_val[train_sample_index][0] = model_item_in[v_i]["v"]
                batch_true_u_v[train_sample_index][0] = 1.0

                if model_user_in[u_i]["l"] > 1:
                    batch_u_interact_length[train_sample_index][0] -= 1
                    li = batch_u[train_sample_index].tolist()
                    ii_ind = li.index(v_i)
                    batch_u[train_sample_index][ii_ind] = li[
                        batch_u_interact_length[train_sample_index][0]]
                    batch_u_val[train_sample_index][0][ii_ind] = batch_u_val[
                        train_sample_index][0][
                            batch_u_interact_length[train_sample_index][0]]
                    batch_u[train_sample_index][
                        batch_u_interact_length[train_sample_index][0]] = 0
                    batch_u_val[train_sample_index][0][
                        batch_u_interact_length[train_sample_index][0]] = 0.0

                if model_item_in[v_i]["l"] > 1:
                    batch_v_interact_length[train_sample_index][0] -= 1
                    li = batch_v[train_sample_index].tolist()
                    ii_ind = li.index(u_i)
                    batch_v[train_sample_index][ii_ind] = li[
                        batch_v_interact_length[train_sample_index][0]]
                    batch_v_val[train_sample_index][0][ii_ind] = batch_v_val[
                        train_sample_index][0][
                            batch_v_interact_length[train_sample_index][0]]
                    batch_v[train_sample_index][
                        batch_v_interact_length[train_sample_index][0]] = 0
                    batch_v_val[train_sample_index][0][
                        batch_v_interact_length[train_sample_index][0]] = 0.0

                train_sample_index += 1

            for sam in range(neg_sample_size):
                u_i, v_i = my_sample.smple_one()
                batch_u_id[train_sample_index], batch_v_id[
                    train_sample_index] = u_i, v_i

                if u_i in model_user_in:
                    batch_u[train_sample_index] = model_user_in[u_i]["k"]
                    batch_u_val[train_sample_index][0] = model_user_in[u_i][
                        "v"]
                    batch_u_interact_length[train_sample_index][
                        0] = model_user_in[u_i]["l"]
                else:
                    batch_u[train_sample_index] = tmp_u
                    batch_u_val[train_sample_index][0] = tmp_u_val
                    batch_u_interact_length[train_sample_index][0] = 0

                if v_i in model_item_in:
                    batch_v[train_sample_index] = model_item_in[v_i]["k"]
                    batch_v_val[train_sample_index][0] = model_item_in[v_i][
                        "v"]
                    batch_v_interact_length[train_sample_index][
                        0] = model_item_in[v_i]["l"]
                else:
                    batch_v[train_sample_index] = tmp_v
                    batch_v_val[train_sample_index][0] = tmp_v_val
                    batch_v_interact_length[train_sample_index][0] = 0
                batch_true_u_v[train_sample_index][0] = 0.0
                train_sample_index += 1

            feed_train = {
                user_id: batch_u_id,
                item_id: batch_v_id,
                u_index: batch_u,
                u_interact_length: batch_u_interact_length,
                u_val: batch_u_val,
                v_index: batch_v,
                v_val: batch_v_val,
                v_interact_length: batch_v_interact_length,
                true_u_v: batch_true_u_v
            }
            _, loss_val, pred_value = sess.run(
                [train_step, model_loss, pred_val], feed_dict=feed_train)
            one_epoch_loss += loss_val
            one_epoch_batchnum += 1.0

            if index != 0 and index % (
                (len(pruned_all_ratings) / batch_size - 1) / show_peroid) == 0:
                # print "epoch: ", epoch, " end"
                format_str = '%s epoch=%d in_epoch=%.2f avg_loss=%.4f'
                print(format_str % (datetime.now(), epoch, 1.0 * index /
                                    (len(pruned_all_ratings) / batch_size),
                                    one_epoch_loss / one_epoch_batchnum))
                one_epoch_loss = one_epoch_batchnum = 0.0

                # 计算 NDCG@10 与 HR@10
                # evaluate_1
                # evaluate_2
                test_hr_list, test_ndcg_list = [], []
                for u_i in latest_item_interaction:
                    v_latest = latest_item_interaction[u_i]

                    # print u_i, v_latest
                    v_random = [v_latest]
                    i = 1
                    while i < 100:
                        rand_num = int(np.random.rand() * (v_max_num - 1) + 1)
                        if rand_num not in user_map_item[
                                u_i] and rand_num not in v_random and rand_num in pruned_item_map_user:
                            v_random.append(rand_num)
                            i += 1

                    for train_sample_index in range(100):
                        if u_i in model_user_in:
                            batch_u_test[train_sample_index] = model_user_in[
                                u_i]["k"]
                            batch_u_test_val[train_sample_index][
                                0] = model_user_in[u_i]["v"]
                            batch_u_test_interact_length[train_sample_index][
                                0] = model_user_in[u_i]["l"]
                        else:
                            batch_u_test[train_sample_index] = tmp_u
                            batch_u_test_val[train_sample_index][0] = tmp_u_val
                            batch_u_test_interact_length[train_sample_index][
                                0] = 0

                        v_i = v_random[train_sample_index]
                        if v_i in model_item_in:
                            batch_v_test[train_sample_index] = model_item_in[
                                v_i]["k"]
                            batch_v_test_val[train_sample_index][
                                0] = model_item_in[v_i]["v"]
                            batch_v_test_interact_length[train_sample_index][
                                0] = model_item_in[v_i]["l"]
                        else:
                            batch_v_test[train_sample_index] = tmp_v
                            batch_v_test_val[train_sample_index][0] = tmp_v_val
                            batch_v_test_interact_length[train_sample_index][
                                0] = 0

                        batch_u_test_id[train_sample_index], batch_v_test_id[
                            train_sample_index] = u_i, v_i

                    feed_test = {
                        user_id: batch_u_test_id,
                        u_index: batch_u_test,
                        u_val: batch_u_test_val,
                        u_interact_length: batch_u_test_interact_length,
                        item_id: batch_v_test_id,
                        v_index: batch_v_test,
                        v_val: batch_v_test_val,
                        v_interact_length: batch_v_test_interact_length
                    }
                    pred_value = sess.run([pred_val], feed_dict=feed_test)
                    pre_real_val = np.array(pred_value).reshape((-1))

                    items = v_random
                    gtItem = items[0]
                    # Get prediction scores
                    map_item_score = {}
                    for i in xrange(len(items)):
                        item = items[i]
                        map_item_score[item] = pre_real_val[i]

                    # Evaluate top rank list
                    # print map_item_score
                    ranklist = heapq.nlargest(top_k,
                                              map_item_score,
                                              key=map_item_score.get)
                    test_hr_list.append(getHitRatio(ranklist, gtItem))
                    test_ndcg_list.append(getNDCG(ranklist, gtItem))

                hr_val, ndcg_val = np.array(test_hr_list).mean(), np.array(
                    test_ndcg_list).mean()
                if hr_val > best_hr or (hr_val == best_hr
                                        and ndcg_val > best_ndcg):
                    best_hr, best_ndcg = hr_val, ndcg_val
                    if epoch > 10:  # 10轮之后再保存模型
                        current_step = tf.train.global_step(sess, global_step)
                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=current_step)
                        print("saved model to: %s" % path)

                print("result: hr=%.4f ndcg=%.4f best_hr=%.4f best_ndcg=%.4f" %
                      (hr_val, ndcg_val, best_hr, best_ndcg))