] logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s', level=logging.DEBUG, datefmt='%d-%m-%Y %H:%M:%S', handlers=handlers) logger.info(args) logger.info('Start training!') label_map = {'SUPPORTS': 0, 'REFUTES': 1, 'NOT ENOUGH INFO': 2} tokenizer = BertTokenizer.from_pretrained(args.bert_pretrain, do_lower_case=False) logger.info("loading training set") trainset_reader = DataLoader(args.train_path, label_map, tokenizer, args, batch_size=args.train_batch_size) logger.info("loading validation set") validset_reader = DataLoader(args.valid_path, label_map, tokenizer, args, batch_size=args.valid_batch_size, test=True) logger.info('initializing estimator model') bert_model = BertForSequenceEncoder.from_pretrained(args.bert_pretrain) ori_model = inference_model(bert_model, args) model = nn.DataParallel(ori_model) model = model.cuda() train_model(model, ori_model, args, trainset_reader, validset_reader)
lambda t, y, s: tf.equal(tf.shape(y)[0], batch_size)) iterator = dataset.make_initializable_iterator() return iterator batch_size = 32 num_epochs = 200 restore = True filename = tf.placeholder(tf.string, shape=[]) iterator = create_iterator(filename, batch_size) length, token, label = iterator.get_next() output = train_model(token, label, length, batch_size) infer_output = inference_model(token, label, length, batch_size) pred = tf.argmax(output, axis=2) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=output) cost = tf.reduce_mean(loss) updates = tf.train.AdamOptimizer(1e-4).minimize(cost) sess = tf.Session() saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) if restore == True: saver.restore(sess, tf.train.latest_checkpoint('../models')) for i in range(num_epochs):
handlers = [ logging.FileHandler(os.path.abspath(args.outdir) + '/train_log.txt'), logging.StreamHandler() ] logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s', level=logging.DEBUG, datefmt='%d-%m-%Y %H:%M:%S', handlers=handlers) logger.info(args) logger.info('Start training!') tokenizer = BertTokenizer.from_pretrained(args.bert_pretrain, do_lower_case=False) logger.info("loading training set") reader = DataLoader(args.train_path, tokenizer, args, batch_size=args.train_batch_size) logger.info("loading validation set") valid_reader = DataLoader(args.valid_path, tokenizer, args, batch_size=args.valid_batch_size) bert_model = BertForSequenceEncoder.from_pretrained(args.bert_pretrain) bert_model = bert_model.to(device) model = inference_model(bert_model, args, device) model = model.to(device) train(args, model, reader, valid_reader)
args = parser.parse_args() if not os.path.exists(args.outdir): os.mkdir(args.outdir) args.cuda = not args.no_cuda and torch.cuda.is_available() logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s', level=logging.DEBUG, datefmt='%d-%m-%Y %H:%M:%S') logger.info(args) logger.info('Start testing!') label_map = {'SUPPORT': 0, 'CONTRADICT': 1, 'NOT_ENOUGH_INFO': 2} label_list = ['SUPPORT', 'CONTRADICT', 'NOT_ENOUGH_INFO'] args.num_labels = len(label_map) tokenizer = AutoTokenizer.from_pretrained(args.pretrain) logger.info("loading validation set") validset_reader = DataLoaderTest(args, label_map, tokenizer, batch_size=args.batch_size) logger.info('initializing estimator model') bert = AutoModel.from_pretrained(args.pretrain).cuda() bert = bert.cuda() bert.eval() model = inference_model(bert, args) model.load_state_dict(torch.load(args.checkpoint)['model']) model = model.cuda() model.eval() eval_model(model, label_list, validset_reader, args.outdir, args.name)
def train_matrix_factorization_With_Feed_Neural(): top_k = 10 best_hr = best_ndcg = 0.0 my_sample = MySampler(pruned_all_ratings, u_max_num, v_max_num) user_id = tf.placeholder(tf.int32, [None, 1], name="user_id") u_index = tf.placeholder(tf.int32, [None, user_max_interact], name="u_index") u_val = tf.placeholder(tf.float32, [None, 1, user_max_interact], name="u_val") u_interact_length = tf.placeholder(tf.int32, [None, 1], name="u_interact_length") item_id = tf.placeholder(tf.int32, [None, 1], name="item_id") v_index = tf.placeholder(tf.int32, [None, item_max_interact], name="v_index") v_val = tf.placeholder(tf.float32, [None, 1, item_max_interact], name="v_val") v_interact_length = tf.placeholder(tf.int32, [None, 1], name="u_interact_length") true_u_v = tf.placeholder(tf.float32, [None, 1], name="true_u_v") global_step = tf.Variable(0, name="global_step", trainable=False) pred_val, model_loss, network_params = inference_model( model_name, user_id, u_index, u_val, u_interact_length, item_id, v_index, v_val, v_interact_length, v_max_num, u_max_num, first_layer_size, last_layer_size, user_max_interact, item_max_interact, true_u_v, graph_hyper_params) train_step = None if graph_hyper_params['opt'] == 'adam': train_step = tf.train.AdamOptimizer(learn_rate).minimize( model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'adgrad': train_step = tf.train.AdagradOptimizer(learn_rate).minimize( model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'adadelta': train_step = tf.train.AdadeltaOptimizer(learn_rate).minimize( model_loss, global_step=global_step) else: print 'No optimizer !' batch_u_id = np.zeros((batch_size + neg_sample_size, 1)).astype('int32') batch_u_interact_length = np.zeros( (batch_size + neg_sample_size, 1)).astype('int32') batch_u = np.zeros( (batch_size + neg_sample_size, user_max_interact)).astype('int32') tmp_u = np.array([0] * user_max_interact).astype('int32') batch_u_val = np.zeros( (batch_size + neg_sample_size, 1, user_max_interact)).astype('float32') tmp_u_val = np.array([[0.0] * user_max_interact]).astype('float32') batch_v_id = np.zeros((batch_size + neg_sample_size, 1)).astype('int32') batch_v_interact_length = np.zeros( (batch_size + neg_sample_size, 1)).astype('int32') batch_v = np.zeros( (batch_size + neg_sample_size, item_max_interact)).astype('int32') tmp_v = np.array([0] * item_max_interact).astype('int32') batch_v_val = np.zeros( (batch_size + neg_sample_size, 1, item_max_interact)).astype('float32') tmp_v_val = np.array([[0.0] * item_max_interact]).astype('float32') batch_true_u_v = np.zeros( (batch_size + neg_sample_size, 1)).astype('float32') batch_u_test_id = np.zeros((100, 1)).astype('int32') batch_u_test_interact_length = np.zeros((100, 1)).astype('int32') batch_u_test = np.zeros((100, user_max_interact)).astype('int32') batch_u_test_val = np.zeros((100, 1, user_max_interact)).astype('float32') batch_v_test_id = np.zeros((100, 1)).astype('int32') batch_v_test_interact_length = np.zeros((100, 1)).astype('int32') batch_v_test = np.zeros((100, item_max_interact)).astype('int32') batch_v_test_val = np.zeros((100, 1, item_max_interact)).astype('float32') saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.7 # sess = tf.Session(config=config) sess = tf.Session() sess.run(tf.global_variables_initializer()) for epoch in range(epochs): np.random.shuffle(pruned_all_ratings) one_epoch_loss = one_epoch_batchnum = 0.0 for index in range(len(pruned_all_ratings) / batch_size): train_sample_index = 0 for u_i, v_i, r_i, t_i in pruned_all_ratings[index * batch_size:(index + 1) * batch_size]: batch_u_id[train_sample_index], batch_v_id[ train_sample_index] = u_i, v_i batch_u_interact_length[train_sample_index][0] = model_user_in[ u_i]["l"] batch_v_interact_length[train_sample_index][0] = model_item_in[ v_i]["l"] batch_u[train_sample_index] = model_user_in[u_i]["k"] batch_u_val[train_sample_index][0] = model_user_in[u_i]["v"] batch_v[train_sample_index] = model_item_in[v_i]["k"] batch_v_val[train_sample_index][0] = model_item_in[v_i]["v"] batch_true_u_v[train_sample_index][0] = 1.0 if model_user_in[u_i]["l"] > 1: batch_u_interact_length[train_sample_index][0] -= 1 li = batch_u[train_sample_index].tolist() ii_ind = li.index(v_i) batch_u[train_sample_index][ii_ind] = li[ batch_u_interact_length[train_sample_index][0]] batch_u_val[train_sample_index][0][ii_ind] = batch_u_val[ train_sample_index][0][ batch_u_interact_length[train_sample_index][0]] batch_u[train_sample_index][ batch_u_interact_length[train_sample_index][0]] = 0 batch_u_val[train_sample_index][0][ batch_u_interact_length[train_sample_index][0]] = 0.0 if model_item_in[v_i]["l"] > 1: batch_v_interact_length[train_sample_index][0] -= 1 li = batch_v[train_sample_index].tolist() ii_ind = li.index(u_i) batch_v[train_sample_index][ii_ind] = li[ batch_v_interact_length[train_sample_index][0]] batch_v_val[train_sample_index][0][ii_ind] = batch_v_val[ train_sample_index][0][ batch_v_interact_length[train_sample_index][0]] batch_v[train_sample_index][ batch_v_interact_length[train_sample_index][0]] = 0 batch_v_val[train_sample_index][0][ batch_v_interact_length[train_sample_index][0]] = 0.0 train_sample_index += 1 for sam in range(neg_sample_size): u_i, v_i = my_sample.smple_one() batch_u_id[train_sample_index], batch_v_id[ train_sample_index] = u_i, v_i if u_i in model_user_in: batch_u[train_sample_index] = model_user_in[u_i]["k"] batch_u_val[train_sample_index][0] = model_user_in[u_i][ "v"] batch_u_interact_length[train_sample_index][ 0] = model_user_in[u_i]["l"] else: batch_u[train_sample_index] = tmp_u batch_u_val[train_sample_index][0] = tmp_u_val batch_u_interact_length[train_sample_index][0] = 0 if v_i in model_item_in: batch_v[train_sample_index] = model_item_in[v_i]["k"] batch_v_val[train_sample_index][0] = model_item_in[v_i][ "v"] batch_v_interact_length[train_sample_index][ 0] = model_item_in[v_i]["l"] else: batch_v[train_sample_index] = tmp_v batch_v_val[train_sample_index][0] = tmp_v_val batch_v_interact_length[train_sample_index][0] = 0 batch_true_u_v[train_sample_index][0] = 0.0 train_sample_index += 1 feed_train = { user_id: batch_u_id, item_id: batch_v_id, u_index: batch_u, u_interact_length: batch_u_interact_length, u_val: batch_u_val, v_index: batch_v, v_val: batch_v_val, v_interact_length: batch_v_interact_length, true_u_v: batch_true_u_v } _, loss_val, pred_value = sess.run( [train_step, model_loss, pred_val], feed_dict=feed_train) one_epoch_loss += loss_val one_epoch_batchnum += 1.0 if index != 0 and index % ( (len(pruned_all_ratings) / batch_size - 1) / show_peroid) == 0: # print "epoch: ", epoch, " end" format_str = '%s epoch=%d in_epoch=%.2f avg_loss=%.4f' print(format_str % (datetime.now(), epoch, 1.0 * index / (len(pruned_all_ratings) / batch_size), one_epoch_loss / one_epoch_batchnum)) one_epoch_loss = one_epoch_batchnum = 0.0 # 计算 NDCG@10 与 HR@10 # evaluate_1 # evaluate_2 test_hr_list, test_ndcg_list = [], [] for u_i in latest_item_interaction: v_latest = latest_item_interaction[u_i] # print u_i, v_latest v_random = [v_latest] i = 1 while i < 100: rand_num = int(np.random.rand() * (v_max_num - 1) + 1) if rand_num not in user_map_item[ u_i] and rand_num not in v_random and rand_num in pruned_item_map_user: v_random.append(rand_num) i += 1 for train_sample_index in range(100): if u_i in model_user_in: batch_u_test[train_sample_index] = model_user_in[ u_i]["k"] batch_u_test_val[train_sample_index][ 0] = model_user_in[u_i]["v"] batch_u_test_interact_length[train_sample_index][ 0] = model_user_in[u_i]["l"] else: batch_u_test[train_sample_index] = tmp_u batch_u_test_val[train_sample_index][0] = tmp_u_val batch_u_test_interact_length[train_sample_index][ 0] = 0 v_i = v_random[train_sample_index] if v_i in model_item_in: batch_v_test[train_sample_index] = model_item_in[ v_i]["k"] batch_v_test_val[train_sample_index][ 0] = model_item_in[v_i]["v"] batch_v_test_interact_length[train_sample_index][ 0] = model_item_in[v_i]["l"] else: batch_v_test[train_sample_index] = tmp_v batch_v_test_val[train_sample_index][0] = tmp_v_val batch_v_test_interact_length[train_sample_index][ 0] = 0 batch_u_test_id[train_sample_index], batch_v_test_id[ train_sample_index] = u_i, v_i feed_test = { user_id: batch_u_test_id, u_index: batch_u_test, u_val: batch_u_test_val, u_interact_length: batch_u_test_interact_length, item_id: batch_v_test_id, v_index: batch_v_test, v_val: batch_v_test_val, v_interact_length: batch_v_test_interact_length } pred_value = sess.run([pred_val], feed_dict=feed_test) pre_real_val = np.array(pred_value).reshape((-1)) items = v_random gtItem = items[0] # Get prediction scores map_item_score = {} for i in xrange(len(items)): item = items[i] map_item_score[item] = pre_real_val[i] # Evaluate top rank list # print map_item_score ranklist = heapq.nlargest(top_k, map_item_score, key=map_item_score.get) test_hr_list.append(getHitRatio(ranklist, gtItem)) test_ndcg_list.append(getNDCG(ranklist, gtItem)) hr_val, ndcg_val = np.array(test_hr_list).mean(), np.array( test_ndcg_list).mean() if hr_val > best_hr or (hr_val == best_hr and ndcg_val > best_ndcg): best_hr, best_ndcg = hr_val, ndcg_val if epoch > 10: # 10轮之后再保存模型 current_step = tf.train.global_step(sess, global_step) path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("saved model to: %s" % path) print("result: hr=%.4f ndcg=%.4f best_hr=%.4f best_ndcg=%.4f" % (hr_val, ndcg_val, best_hr, best_ndcg))