def train(): #加载训练数据并生成可训练数据 train_sor_data, train_mub_data = load_sentences(FLAGS.train_sor_path, FLAGS.train_mub_path) #将训练数据处理成N批次数据 train_manager = BatchManager(train_sor_data, train_mub_data, FLAGS.batch_size) #设置gpu参数 tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True #加载FLAGS参数 config = config_model() logger = get_logger(config["logger_path"]) #计算批次数 word2id, id2word = load_sor_vocab() steps_per_epoch = train_manager.len_data with tf.Session(config=tf_config) as sess: model = create_model_and_embedding(sess, Model, FLAGS.model_path, config, True) logger.info("start training") loss = [] with tf.device('/gpu:0'): for i in range(FLAGS.num_of_epoch): for batch in train_manager.iter_batch(shuffle=True): step, batch_loss = model.run_step(sess, True, batch) loss.append(batch_loss) if step % FLAGS.steps_check == 0: iteration = step // steps_per_epoch + 1 logger.info( "iteration:{} step:{}/{},chatbot loss:{:>9.6f}". format(iteration, step % steps_per_epoch, steps_per_epoch, np.mean(loss))) loss = [] if i % 10 == 0: save_model(sess, model, FLAGS.model_path, logger)
def train(): train_sentences, dico, char_to_id, id_to_char = load_sentence( FLAGS.train_file) if not os.path.isfile(FLAGS.map_file): if FLAGS.pre_emb: dico_chars, char_to_id, id_to_char = augment_with_pretrained( dico.copy(), FLAGS.emb_file, ) else: sentences, dico, char_to_id, id_to_char = load_sentence( FLAGS.train_file) print(train_sentences[0]) with open(FLAGS.map_file, 'wb') as f: pickle.dump([char_to_id, id_to_char], f) else: with open(FLAGS.map_file, 'rb') as f: char_to_id, id_to_char = pickle.load(f) train_data, test_data, dev_data = prepare_dataset(train_sentences, char_to_id) print(train_data[0]) print(test_data[0]) print(dev_data[0]) print(len(train_data), len(dev_data), len(test_data)) train_manager = BatchManager(train_data, FLAGS.batch_size) test_manager = BatchManager(test_data, 100) dev_manager = BatchManager(dev_data, 100) make_path(FLAGS) if os.path.isfile(FLAGS.config_file): config = load_config(FLAGS.config_file) else: config = config_model(char_to_id) save_config(config, FLAGS.config_file) make_path(FLAGS) log_path = os.path.join("log", FLAGS.log_file) logger = get_logger(log_path) print_config(config, logger) os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) tf_config = tf.ConfigProto(gpu_options=gpu_options) tf_config.gpu_options.allow_growth = True steps_per_epoch = train_manager.len_data with tf.Session(config=tf_config) as sess: model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger) logger.info("start training") loss = [] best = 0 # sess.graph.finalize() for i in range(50): for batch in train_manager.iter_batch(shuffle=True): step, batch_loss = model.run_step(sess, True, batch) loss.append(batch_loss) if step % FLAGS.steps_check == 0: iteration = step // steps_per_epoch + 1 logger.info("iteration:{} step:{}/{},".format( iteration, step % steps_per_epoch, steps_per_epoch)) loss = [] Acc_result = evaluate(sess, model, "dev", dev_manager, logger) logger.info("Acc{}".format(Acc_result)) logger.info("test") # precision, recall, f1_score = model.evaluete_(sess,test_manager) # logger.info("P, R, F,{},{},{}".format(precision, recall, f1_score)) test_result = evaluate(sess, model, "test", test_manager, logger) if test_result > best: best = test_result save_model(sess, model, FLAGS.ckpt_path, logger)
def train(conf): train_sentences = load_sentences(conf.train_file, conf.zeros) dev_sentences = load_sentences(conf.dev_file, conf.zeros) test_sentences = load_sentences(conf.test_file, conf.zeros) dico_chars_train = char_mapping(train_sentences, conf.lower)[0] dico_chars, char_to_id, id_to_char = augment_with_pretrained( dico_chars_train.copy(), conf.emb_file, list( itertools.chain.from_iterable([[w[0] for w in s] for s in test_sentences]))) _t, tag_to_id, id_to_tag = tag_mapping(train_sentences) # prepare data, get a collection of list containing index train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id, conf.lower) dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id, conf.lower) test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id, conf.lower) #loading word embeddings all_word_embeds = {} for i, line in enumerate(codecs.open(conf.emb_file, 'r', 'utf-8')): s = line.strip().split() if len(s) == conf.embedding_dim + 1: all_word_embeds[s[0]] = np.array([float(i) for i in s[1:]]) word_embeds_dict = np.random.uniform(-np.sqrt(0.06), np.sqrt(0.06), (len(char_to_id), conf.embedding_dim)) for w in char_to_id: if w in all_word_embeds: word_embeds_dict[char_to_id[w]] = all_word_embeds[w] elif w.lower() in all_word_embeds: word_embeds_dict[char_to_id[w]] = all_word_embeds[w.lower()] print('Loaded %i pretrained embeddings.' % len(all_word_embeds)) train_manager = BatchManager(train_data, conf.batch_size) model = BiLSTM_CRF(conf, tag_to_id, char_to_id, word_embeds_dict) optimizer = torch.optim.SGD(model.parameters(), lr=conf.learning_rate, weight_decay=1e-4) epoch = conf.epochs dev_f1_ = 0 for epoch in range(1, epoch + 1): print(f'train on epoch {epoch}') j = 1 for batch in train_manager.iter_batch(shuffle=True): batch_loss = 0.0 sentences = batch[1] tags = batch[-1] for i, index in enumerate(np.random.permutation(len(sentences))): model.zero_grad() sentence_in = sentences[index] tags_in = tags[index] loss = model.neg_log_likelihood(sentence_in, tags_in) loss.backward() optimizer.step() batch_loss += loss.data print( f'[batch {j},batch size:{conf.batch_size}] On this batch loss: {batch_loss}' ) j = j + 1 print(f'Begin validing result on [epoch {epoch}] valid dataset ...') dev_results = get_predictions(model, dev_data, id_to_tag) dev_f1 = evaluate_ner(dev_results, conf) if dev_f1 > dev_f1_: torch.save(model, conf.model_file) print('save model success.') test_results = get_predictions(model, test_data, id_to_tag) test_f1 = evaluate_ner(test_results, conf) print(f'[epoch {epoch}] On test dataset] f1: {test_f1:3f}')
if singletons is not None: words = insert_singletons(words, singletons) if parameters['cap_dim']: caps = data['caps'] char_for, char_rev, char_pos = pad_word_chars(chars) input = [] if parameters['word_dim']: input.append(words) if parameters['char_dim']: input.append(char_for) if parameters['char_bidirect']: input.append(char_rev) input.append(char_pos) if parameters['cap_dim']: input.append(caps) if add_label: input.append(data['tags']) return input if __name__ == "__main__": train_sentences = load_sentences("./data/input.train", True) print(train_sentences) # create maps if not exist _c, char_to_id, id_to_char = char_mapping(train_sentences, True) _t, tag_to_id, id_to_tag = tag_mapping(train_sentences) train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id, True) train_manager = BatchManager(train_data, 100) for batch in train_manager.iter_batch(shuffle=True): print(batch[0]) print(batch[-1])
def main(_): if not os.path.isdir(FLAGS.log_path): os.makedirs(FLAGS.log_path) if not os.path.isdir(FLAGS.model_path): os.makedirs(FLAGS.model_path) if not os.path.isdir(FLAGS.result_path): os.makedirs(FLAGS.result_path) tag_to_id = { "O": 0, "B-LOC": 1, "I-LOC": 2, "B-PER": 3, "I-PER": 4, "B-ORG": 5, "I-ORG": 6 } # load data id_to_word, id_to_tag, train_data, dev_data, test_data = load_data( FLAGS, tag_to_id) train_manager = BatchManager(train_data, len(id_to_tag), FLAGS.word_max_len, FLAGS.batch_size) dev_manager = BatchManager(dev_data, len(id_to_tag), FLAGS.word_max_len, FLAGS.valid_batch_size) test_manager = BatchManager(test_data, len(id_to_tag), FLAGS.word_max_len, FLAGS.valid_batch_size) with tf.Session() as sess: model = create_model(sess, id_to_word, id_to_tag) loss = 0 best_test_f1 = 0 steps_per_epoch = len(train_data) // FLAGS.batch_size + 1 for _ in range(FLAGS.max_epoch): iteration = (model.global_step.eval()) // steps_per_epoch + 1 train_manager.shuffle() for batch in train_manager.iter_batch(): global_step = model.global_step.eval() step = global_step % steps_per_epoch batch_loss = model.run_step(sess, True, batch) loss += batch_loss / FLAGS.steps_per_checkpoint if global_step % FLAGS.steps_per_checkpoint == 0: model.logger.info( "iteration:{} step:{}/{}, NER loss:{:>9.6f}".format( iteration, step, steps_per_epoch, loss)) loss = 0 model.logger.info("validating ner") ner_results = model.predict(sess, dev_manager) eval_lines = test_ner(ner_results, FLAGS.result_path) for line in eval_lines: model.logger.info(line) test_f1 = float(eval_lines[1].strip().split()[-1]) if test_f1 > best_test_f1: best_test_f1 = test_f1 model.logger.info("new best f1 score:{:>.3f}".format(test_f1)) model.logger.info("saving model ...") checkpoint_path = os.path.join(FLAGS.model_path, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) # test model model.logger.info("testing ner") ckpt = tf.train.get_checkpoint_state(FLAGS.model_path) model.logger.info("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) ner_results = model.predict(sess, test_manager) eval_lines = test_ner(ner_results, FLAGS.result_path) for line in eval_lines: model.logger.info(line)