def calc_score(): seed_everything() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = Seq2seq().to(device) model.load( torch.load('{}/{}_{}.pth'.format(OUTPUT_DIR, FN, CKPT_NUM))['model']) sp = spm.SentencePieceProcessor() sp.Load(SP_PATH) test_data_txt = open(TEST_DATA_TXT_PATH, 'r', encoding='utf8') with open(TEST_DATA_PKL_PATH, 'rb') as f: test_data_pkl = pickle.load(f) dataset = DialogDataset(test_data_pkl) data_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) if LOSS == 'SCE': criterion = SCELoss() elif LOSS == 'ITF': criterion = ITFLoss(device, _lambda=LAMBDA) else: criterion = INFLoss(device, _lambda=LAMBDA) count = 0 test_ref, test_hyp = [], [] for line in test_data_txt: count += 1 if count % 2 != 0: if LANGUAGE == 'JP': test_ref.append( sp.EncodeAsPieces( evaluate(line, sp, model, device).replace('▁', ''))) else: test_ref.append( sp.EncodeAsPieces( evaluate(line, sp, model, device).replace('▁', ' '))) else: if LANGUAGE == 'JP': test_hyp.append(sp.EncodeAsPieces(line.replace('▁', ''))) else: test_hyp.append(sp.EncodeAsPieces(line.replace('▁', ' '))) print("---------- RESULTS ---------") test_ppl = get_perplexity(model, criterion, data_loader, device) print("PPL: {}".format(test_ppl)) test_bleu_1 = get_bleu_score(test_ref, test_hyp, 1) * 100 test_bleu_2 = get_bleu_score(test_ref, test_hyp, 2) * 100 print("BLEU-1:{}, 2:{}".format(test_bleu_1, test_bleu_2)) test_rouge_1 = get_rouge_score(test_ref, test_hyp, 1) * 100 test_rouge_2 = get_rouge_score(test_ref, test_hyp, 2) * 100 print("ROUGE-1:{}, 2:{}".format(test_rouge_1, test_rouge_2)) test_dist_1 = get_dist_n(test_ref, 1) * 100 test_dist_2 = get_dist_n(test_ref, 2) * 100 print("DIST-1:{}, 2:{}".format(test_dist_1, test_dist_2)) test_len = get_length(test_ref) / count print("LENGTH:{}".format(test_len))
def __init__(self, model_path, output_dir): self.logger = logging.getLogger('paragraph-level') self.output_dir = output_dir self.test_data = open(config.test_trg_file, "r").readlines() self.data_loader = get_loader(config.test_src_file, config.test_trg_file, config.test_ans_file, batch_size=1, use_tag=False, shuffle=False) self.tokenizer = BertTokenizer.from_pretrained(r'MTBERT/vocab.txt') self.model_config = BertConfig.from_pretrained('MTBERT') self.model = Seq2seq() if config.use_gpu: state_dict = torch.load(model_path, map_location=config.device) else: state_dict = torch.load(model_path, map_location='cpu') self.model.load_state_dict(state_dict) self.model.eval() if config.use_gpu: self.moddel = self.model.to(config.device) self.pred_dir = 'result/pointer_maxout_ans/generated.txt' self.golden_dir = 'result/pointer_maxout_ans/golden.txt' self.src_file = 'result/pointer_maxout_ans/src.txt' # dummy file for evaluation with open(self.src_file, "w") as f: for i in range(len(self.data_loader)): f.write(str(i) + "\n")
def __init__(self, model_path, output_dir): with open(config.word2idx_file, "rb") as f: word2idx = pickle.load(f) self.output_dir = output_dir self.test_data = open(config.test_trg_file, "r").readlines() self.data_loader = get_loader(config.test_src_file, config.test_trg_file, word2idx, batch_size=1, use_tag=True, shuffle=False) self.tok2idx = word2idx self.idx2tok = {idx: tok for tok, idx in self.tok2idx.items()} self.model = Seq2seq() state_dict = torch.load(model_path) self.model.load_state_dict(state_dict) self.model.eval() self.moddel = self.model.to(config.device) self.pred_dir = os.path.join(output_dir, "generated.txt") self.golden_dir = os.path.join(output_dir, "golden.txt") self.src_file = os.path.join(output_dir, "src.txt") if not os.path.exists(output_dir): os.makedirs(output_dir) # dummy file for evaluation with open(self.src_file, "w") as f: for i in range(len(self.data_loader)): f.write(str(i) + "\n")
def __init__(self, model_path=None): # load dictionary and embedding file with open(config.embedding, "rb") as f: embedding = pickle.load(f) embedding = torch.Tensor(embedding).to(config.device) with open(config.word2idx_file, "rb") as f: word2idx = pickle.load(f) # train, dev loader print("load train data") self.train_loader = get_loader(config.train_src_file, config.train_trg_file, word2idx, use_tag=config.use_tag, batch_size=config.batch_size, debug=config.debug) self.dev_loader = get_loader(config.dev_src_file, config.dev_trg_file, word2idx, use_tag=config.use_tag, batch_size=128, debug=config.debug) train_dir = os.path.join("./save", "seq2seq") self.model_dir = os.path.join(train_dir, "train_%d" % int(time.strftime("%m%d%H%M%S"))) if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.model = Seq2seq(embedding, config.use_tag, model_path=model_path) params = list(self.model.encoder.parameters()) \ + list(self.model.decoder.parameters()) self.lr = config.lr self.optim = optim.SGD(params, self.lr, momentum=0.8) self.criterion = nn.CrossEntropyLoss(ignore_index=0)
def create_model(args): trim_min_count = 5 data_loader = DataLoader(args, trim_min_count=trim_min_count) embed_model = nn.Embedding(data_loader.vocab_len, args.embed) embed_model.weight.data.copy_(data_loader.embed_vectors) encode_model = Encoder( embed_model=embed_model, hidden_size=args.hidden, span_size=data_loader.span_size, dropout=args.dropout, ) decode_model = Decoder( embed_model=embed_model, op_set=data_loader.op_set, vocab_dict=data_loader.vocab_dict, class_list=data_loader.class_list, hidden_size=args.hidden, dropout=args.dropout, use_cuda=args.use_cuda ) seq2seq = Seq2seq(encode_model, decode_model) return seq2seq, data_loader
def predict(self, seq2seq): predicts = [] ids = [] sentences = [] lengths = [] seq2seq = Seq2seq(self.config, device=self.device, load_emb=True) # gold = [] data = prepare.load_data(self.mode) if mode == 'test': data = prepare.test_process(data) else: data = prepare.process(data) data = data_prepare.Data(data, config.batch_size, config) for batch_i in range(data.batch_number): batch_data = data.next_batch(is_random=False) pred_action_list, pred_logits_list = self.test_step( batch_data, seq2seq) pred_action_list = pred_action_list.cpu().numpy() sentences.extend(batch_data.sentence_fw) predicts.extend([ pred_action_list[:, i] for i in range(pred_action_list.shape[1]) ]) # print(len(predicts)) ids.extend(batch_data.standard_outputs) lengths.extend(batch_data.input_sentence_length) evaluation.get_result(ids, sentences, lengths, predicts, config) # gold.extend(batch_data.all_triples) # (r_f1, r_precision, r_recall), (e_f1, e_precision, e_recall) = evaluation.rel_entity_compare(predicts, gold, self.config) data.reset()
def train(epoch_num): args.train = True model = Seq2seq(args, ch_words_dict) batch_index, min_loss = 0, 100 with tf.Session() as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) for epoch in range(epoch_num): train_gegnator = batch_genetator(mode='train') try: while True: sou_sentences_list, sou_length_list, tag_sentences_list, tag_length_list = next(train_gegnator) print('sentence length{}'.format(len(sou_sentences_list[0]))) if len(sou_sentences_list[0]) > 90: continue feed_dict = {model.sequence_input: sou_sentences_list, model.sequence_length: sou_length_list, model.target_input: tag_sentences_list, model.target_length: tag_length_list} loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict) print('epoch: {}, batch index: {}, loss: {}, current min loss: {}'.format(epoch, batch_index, loss, min_loss)) if loss < min_loss: min_loss = loss print('save at epoch: {}, batch {} the loss is {}'.format(epoch, batch_index, min_loss)) saver.save(sess, '../model/model.ckpt') batch_index = batch_index + 1 except StopIteration as e: print('finish training')
def __init__(self): logging.info("load data......") self.data = datasets.Lang8v1() self.data.process() self.data.show() self.config = Config() self.config.source_vocab_size = self.data.src_vocab_size self.config.target_vocab_size = self.data.tgt_vocab_size self.config.batch_size = 1 logging.info("build model......") self.model = Seq2seq(config=self.config, src_embedding=self.data.src_embedding_matrix, tgt_embedding=self.data.tgt_embedding_matrix, useTeacherForcing=False, useAttention=True, useBeamSearch=8) logging.info("init model......") # with tf.Session() as sess: sess = tf.Session() self.model.init(sess) checkpoint_path = tf.train.latest_checkpoint( self.config.checkpoint_dir) assert checkpoint_path, 'No checkpoint found' logging.info('Restore model from %s' % checkpoint_path) self.model.saver.restore(sess, checkpoint_path)
def __init__(self, args): # load dictionary and embedding file with open(config.embedding, "rb") as f0: embedding = pickle.load(f0) embedding = torch.tensor(embedding, dtype=torch.float).to(config.device) with open(config.entity_embedding, "rb") as f1: ent_embedding = pickle.load(f1) ent_embedding = torch.tensor(ent_embedding, dtype=torch.float).to(config.device) with open(config.relation_embedding, "rb") as f2: rel_embedding = pickle.load(f2) rel_embedding = torch.tensor(rel_embedding, dtype=torch.float).to(config.device) with open(config.word2idx_file, "rb") as f: word2idx = pickle.load(f) with open(config.ent2idx_file, "rb") as g: ent2idx = pickle.load(g) with open(config.rel2idx_file, "rb") as h: rel2idx = pickle.load(h) # train, dev loader print("load train data") self.train_loader = get_loader(config.train_src_file, config.train_trg_file, config.train_csfile, word2idx, use_tag=True, batch_size=config.batch_size, debug=config.debug) self.dev_loader = get_loader(config.dev_src_file, config.dev_trg_file, config.dev_csfile, word2idx, use_tag=True, batch_size=128, debug=config.debug) train_dir = "./save" self.model_dir = os.path.join( train_dir, "train_%d" % int(time.strftime("%m%d%H%M%S"))) if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.model = Seq2seq(embedding, ent_embedding, rel_embedding) # self.model = nn.DataParallel(self.model) self.model = self.model.to(config.device) if len(args.model_path) > 0: print("load check point from: {}".format(args.model_path)) state_dict = torch.load(args.model_path, map_location="cpu") self.model.load_state_dict(state_dict) params = self.model.parameters() self.lr = config.lr self.optim = optim.SGD(params, self.lr, momentum=0.8) # self.optim = optim.Adam(params) self.criterion = nn.CrossEntropyLoss(ignore_index=0)
def __init__(self, config: const.Config, mode: str, device: torch.device) -> None: self.config = config self.device = device self.seq2seq = Seq2seq(config, device=device) data = prepare.load_data(mode) data = prepare.process(data) self.data = data_prepare.Data(data, config.batch_size, config)
def test(dataset, args, test_id): if args.model == 'seq2seq': model = Seq2seq(32, args.lr, dataset.vocabsize, args.embed_dim, args.fs, args.feat_dim, dataset.pretrain_wordemb, False, pred_batch_size=1) else: print('choose a model to train! ') parser.print_help() return model.build_model() print(model.x) saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) latest_ckpt = tf.train.latest_checkpoint(args.loadpath) saver.restore(sess, latest_ckpt) print('restore from', latest_ckpt) fwirte = open(args.output, 'w') for step in range(0, dataset.iters): batch_x = dataset.next_batch_test() hist = np.zeros((1, dataset.vocabsize), dtype=np.float32) hist = list(hist) indices = np.ones((1, 1), dtype=np.float32) indices = list(indices) _, probs = sess.run([model.generated_words, model.probs], feed_dict={ model.x: batch_x, model.hist: hist, model.indices:indices}) words = beamsearch(probs, dataset.wtoi) sentence = '' for idx in words: if idx != 0 and dataset.itow[idx] == '<eos>': break; if idx != 0: sentence += dataset.itow[idx] + ' ' # test for special mission if test_id[step] == 'klteYv1Uv9A_27_33.avi' or test_id[step] == '5YJaS2Eswg0_22_26.avi' or test_id[step] == 'UbmZAe5u5FI_132_141.avi' \ or test_id[step] == 'JntMAcTlOF0_50_70.avi' or test_id[step] == 'tJHUH9tpqPg_113_118.avi': print(test_id[step], sentence) fwirte.write('%s,%s\n' % (test_id[step], sentence)) fwirte.close() print('save test result file as', args.output)
def __init__(self, config: const.Config, device: torch.device) -> None: self.config = config self.device = device self.seq2seq = Seq2seq(config, device=device, load_emb=True) self.loss = nn.NLLLoss() self.optimizer = torch.optim.Adam(self.seq2seq.parameters()) data = prepare.load_data('train') data = prepare.process(data) self.data = data_prepare.Data(data, config.batch_size, config) self.epoch_number = config.epoch_number + 1
def __init__(self): self.DEVICE = torch.device("cuda" if config.is_cuda else "cpu") dataset = PairDataset(config.data_path, max_src_len=config.max_src_len, max_tgt_len=config.max_tgt_len, truncate_src=config.truncate_src, truncate_tgt=config.truncate_tgt) self.vocab = dataset.build_vocab(embed_file=config.embed_file) self.model = Seq2seq(self.vocab) self.stop_word = list( set([ self.vocab[x.strip()] for x in open(config.stop_word_file).readlines() ])) self.model.load_model() self.model.to(self.DEVICE)
def test_conv(): seed_everything() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = Seq2seq().to(device) model.load(torch.load('{}/{}_{}.pth'.format(OUTPUT_DIR, FN, CKPT_NUM))['model']) sp = spm.SentencePieceProcessor() sp.Load(SP_PATH) test_data_txt = open(TEST_DATA_TXT_PATH, 'r', encoding='utf8') csv_name = './result_{}_{}_epoch{}.csv'.format(LOSS, LAMBDA, EPOCH_NUM) convs, tgts = [], [] count = 0 for line in test_data_txt: count += 1 if count % 2 != 0: if LANGUAGE == 'JP': underline_replace = '' else: underline_replace = ' ' convs.append({ "src": line, "result_1": evaluate(line, sp, model, device).replace('▁', underline_replace), "result_2": evaluate(line, sp, model, device).replace('▁', underline_replace), "result_3": evaluate(line, sp, model, device).replace('▁', underline_replace), }) else: tgts.append(line) col_name = ['src', 'result_1', 'result_2', 'result_3', 'tgt'] try: with open(csv_name, 'w', newline='', encoding='utf8') as output_csv: csv_writer = csv.writer(output_csv) csv_writer.writerow(col_name) for conv, tgt in zip(convs, tgts): row_items = [conv['src'], conv['result_1'], conv['result_2'], conv['result_3'], tgt] csv_writer.writerow(row_items) except OSError: print('---------- OS Error ----------')
def run_evaluate(): seed_everything() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') sp_model = spm.SentencePieceProcessor() sp_model.Load(SP_PATH) model = Seq2seq().to(device) model.load( torch.load('{}/{}_{}.pth'.format(OUTPUT_DIR, FN, CKPT_NUM))['model']) while True: s = input('You > ') if s == 'q': break print('BOT > ', end='') if LANGUAGE == 'JP': evaluate(s, sp_model, model, device).replace('_', '') else: evaluate(s, sp_model, model, device).replace('_', ' ')
def __init__(self, args): self.logger = logging.getLogger('paragraph-level') # train, dev loader print("load train data") self.train_loader = get_loader(config.train_src_file, config.train_trg_file, config.train_ans_file, batch_size=config.batch_size, debug=config.debug, shuffle=True) self.dev_loader = get_loader(config.dev_src_file, config.dev_trg_file, config.dev_ans_file, batch_size=128, debug=config.debug) train_dir = os.path.join(config.file_path + "save", "seq2seq") self.model_dir = os.path.join( train_dir, "train_%d" % int(time.strftime("%m%d%H%M%S"))) if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.model = Seq2seq() if config.use_gpu: self.model = self.model.to(config.device) if len(args.model_path) > 0: print("load check point from: {}".format(args.model_path)) state_dict = torch.load(args.model_path, map_location="cpu") self.model.load_state_dict(state_dict) params = self.model.parameters() bert_params = self.model.bert_encoder.named_parameters() for name, param in bert_params: param.requires_grad = False base_params = filter(lambda p: p.requires_grad, self.model.parameters()) self.lr = config.lr self.optim = optim.SGD(base_params, self.lr, momentum=0.8) # self.optim = optim.Adam(params) self.criterion = nn.CrossEntropyLoss(ignore_index=0)
def __init__(self, model_path, output_dir): with open(config.word2idx_file, "rb") as f: word2idx = pickle.load(f) self.output_dir = output_dir self.test_data = open(config.test_trg_file, "r").readlines() self.data_loader = get_loader(config.test_src_file, config.test_trg_file, word2idx, batch_size=1, use_tag=config.use_tag, shuffle=False) self.tok2idx = word2idx self.idx2tok = {idx: tok for tok, idx in self.tok2idx.items()} self.model = Seq2seq(model_path=model_path) self.pred_dir = output_dir + "/generated.txt" self.golden_dir = output_dir + "/golden.txt" if not os.path.exists(output_dir): os.makedirs(output_dir)
def __init__(self): # load Bert Tokenizer and pre-trained word embedding self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") embeddings = None self.model = Seq2seq(config.dropout, embeddings, use_tag=config.use_tag) train_dir = os.path.join("./save", "c2q") self.train_loader = self.get_data_loader("./squad/train-v1.1.json") self.dev_loader = self.get_data_loader("./squad/new_dev-v1.1.json") self.model_dir = os.path.join(train_dir, "train_%d" % int(time.strftime("%m%d%H%M%S"))) if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) params = list(self.model.encoder.parameters()) \ + list(self.model.decoder.parameters()) self.lr = 0.1 self.optim = optim.SGD(params, lr=self.lr) self.criterion = nn.CrossEntropyLoss(ignore_index=0)
def __init__(self, model_path, output_dir): self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") self.output_dir = output_dir self.golden_q_ids = None self.all_c_tokens = None self.all_answer_text = None self.data_loader = self.get_data_loader("./squad/new_test-v1.1.json") self.tok2idx = self.tokenizer.vocab self.idx2tok = {idx: tok for tok, idx in self.tok2idx.items()} self.model = Seq2seq(dropout=0.0, model_path=model_path, use_tag=config.use_tag) self.model.requires_grad = False self.model.eval_mode() self.src_file = output_dir + "/src.txt" self.pred_file = output_dir + "/generated.txt" self.golden_file = output_dir + "/golden.txt" self.ans_file = output_dir + "/answer.txt" self.total_file = output_dir + "/all_files.csv" if not os.path.exists(output_dir): os.makedirs(output_dir)
def evaluate(): args.beam_search_num = -1 en_id2word_path = '../Dataset/en_id2word_dict.pkl' ch_id2word_path = '../Dataset/ch_id2word_dict.pkl' with open(en_id2word_path, 'rb') as f: en_id2word_dict = pkl.load(f) with open(ch_id2word_path, 'rb') as f: ch_id2word_dict = pkl.load(f) model = Seq2seq(args, ch_words_dict) evaluate_generator = batch_genetator(mode='eva') batch_index = 0 with tf.Session() as sess: saver = tf.train.Saver() saver.restore(sess, '../model/model.ckpt') try: while True: sou_sentences_list, sou_length_list, tag_sentences_list, tag_length_list = next(evaluate_generator) if len(sou_sentences_list[0]) > 90: continue feed_dict = {model.sequence_input: sou_sentences_list, model.sequence_length: sou_length_list, model.target_input: tag_sentences_list, model.target_length: tag_length_list} predict_ids = sess.run(model.out, feed_dict=feed_dict) for sentence_index in range(len(sou_sentences_list)): sou_sentence = [en_id2word_dict[i] for i in sou_sentences_list[sentence_index]] predict_sentence = [ch_id2word_dict[i] for i in predict_ids[sentence_index]] tag_sentence = [ch_id2word_dict[i] for i in tag_sentences_list[sentence_index]] print('sou_sentence: {}'.format(sou_sentence)) print('predict_sentence: {}'.format(predict_sentence)) print('tag_sentence: {}'.format(tag_sentence)) batch_index = batch_index + 1 except StopIteration as e: print('finish training')
model_path = "checkpoint/model.ckpt" if __name__ == "__main__": print("(1)load data......") docs_source, docs_target = load_data(10) w2i_source, i2w_source = make_vocab(docs_source) w2i_target, i2w_target = make_vocab(docs_target) print("(2) build model......") config = Config() config.source_vocab_size = len(w2i_source) config.target_vocab_size = len(w2i_target) model = Seq2seq(config=config, w2i_target=w2i_target, useTeacherForcing=False, useAttention=True, useBeamSearch=3) print("(3) run model......") print_every = 100 max_target_len = 20 with tf.Session(config=tf_config) as sess: saver = tf.train.Saver() saver.restore(sess, model_path) source_batch, source_lens, target_batch, target_lens = get_batch( docs_source, w2i_source, docs_target, w2i_target, config.batch_size)
source_batch.append(source_seq) target_batch.append(target_seq) return source_batch, source_lens, target_batch, target_lens if __name__ == '__main__': print 'loading data ...' doc_source = helper.load_file('./data/small_vocab_en.txt') doc_target = helper.load_file('./data/small_vocab_fr.txt') s_token2idx, s_idx2token = helper.load_vocab('./data/small_vocab_en.txt', helper.SOURCE_CODES) t_token2idx, t_idx2token = helper.load_vocab('./data/small_vocab_fr.txt', helper.TARGET_CODES) print 'building model...' config = config() config.source_vocab_size = len(s_token2idx) config.target_vocab_size = len(t_token2idx) model = Seq2seq(config, t_token2idx, useTeacherForcing=True) batches = 10000 print_every = 100 print 'run model...' with tf.Session() as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) losses = [] total_loss = 0 for batch in range(batches): source_batch, source_lens, target_batch, target_lens = get_batch(doc_source, s_token2idx, doc_target, t_token2idx, config.batch_size) feed_dict = { model.seq_inputs: source_batch, model.seq_inputs_len: source_lens,
def step_one(): if args.mode == 0: encoder_cell = 'lstm' decoder_cell = 'lstm' elif args.mode == 1: encoder_cell = 'gru' decoder_cell = 'gru' elif args.mode == 2: encoder_cell = 'gru' decoder_cell = 'lstm' else: encoder_cell = 'lstm' decoder_cell = 'gru' data_loader = DataLoader(args) embed_model = nn.Embedding(data_loader.vocab_len, 128) #embed_model.weight.data.copy_(torch.from_numpy(data_loader.word2vec.emb_vectors)) encode_model = EncoderRNN(vocab_size=data_loader.vocab_len, embed_model=embed_model, emb_size=128, hidden_size=256, input_dropout_p=0.3, dropout_p=0.4, n_layers=2, bidirectional=True, rnn_cell=None, rnn_cell_name=encoder_cell, variable_lengths=True) decode_model = DecoderRNN_3(vocab_size=data_loader.vocab_len, class_size=data_loader.classes_len, embed_model=embed_model, emb_size=128, hidden_size=512, n_layers=2, rnn_cell=None, rnn_cell_name=decoder_cell, sos_id=data_loader.vocab_dict['END_token'], eos_id=data_loader.vocab_dict['END_token'], input_dropout_p=0.3, dropout_p=0.4) seq2seq = Seq2seq(encode_model, decode_model) if args.cuda_use: seq2seq = seq2seq.cuda() weight = torch.ones(data_loader.classes_len) pad = data_loader.decode_classes_dict['PAD_token'] loss = NLLLoss(weight, pad) st = SupervisedTrainer(vocab_dict=data_loader.vocab_dict, vocab_list=data_loader.vocab_list, decode_classes_dict=data_loader.decode_classes_dict, decode_classes_list=data_loader.decode_classes_list, cuda_use=args.cuda_use, loss=loss, print_every=10, teacher_schedule=False, checkpoint_dir_name=args.checkpoint_dir_name) print('start training') st.train(model=seq2seq, data_loader=data_loader, batch_size=128, n_epoch=300, template_flag=True, resume=args.resume, optimizer=None, mode=args.mode, teacher_forcing_ratio=args.teacher_forcing_ratio, post_flag=args.post_flag)
from torch.utils.data import DataLoader from config import * from model import Seq2seq, SCELoss, ITFLoss, INFLoss from utils import DialogDataset, get_optimizer, seed_everything, one_cycle logging.basicConfig(level=logging.INFO) if __name__ == '__main__': logging.info('---------- Initializing ----------') seed_everything() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') start_epoch = CKPT_NUM logging.info('---------- Define Models ----------') model = Seq2seq().to(device) if MULTI: model = torch.nn.DataParallel(model) sp = spm.SentencePieceProcessor() sp.Load(SP_PATH) logging.info('---------- Define Loss and Optimizer ----------') if LOSS == 'SCE': criterion = SCELoss() elif LOSS == 'ITF': criterion = ITFLoss(device, _lambda=LAMBDA) else: criterion = INFLoss(device, _lambda=LAMBDA) _opt = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.98), eps=1e-9) optimizer = get_optimizer(_opt)
def train(dataset, args, retrain): if args.model == 'seq2seq': model = Seq2seq(args.bs, args.lr, dataset.vocabsize, args.embed_dim, args.fs, args.feat_dim, dataset.pretrain_wordemb) else: print('choose a model to train! ') parser.print_help() return model.build_model() saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) if retrain == True: latest_ckpt = tf.train.latest_checkpoint(args.loadpath) saver.restore(sess, latest_ckpt) print('restore from', latest_ckpt) print ('Start training, method=%s, lr=%f, epoch=%d, comment=%s'% (model.name, args.lr, args.ep, args.comment)) path_args = args.savepath.split('/')[0] + '/output.log' fwirte = open(path_args, 'w') preloss = 1000. earlystop = 0 cutting_len = 8 pre_loss = 10.0 roll_sch = 1.0 for ep in range(1, args.ep+1): correct = [] train_total_loss = 0 start_time = time.time() if pre_loss < 0.65 and cutting_len < 45: # prevent overfitting cutting_len += 10 dataset.random_sample(4, int(cutting_len)) print('re sample, size:', dataset.size, ', iters:', dataset.iters, 'cutting_size:', cutting_len) else: dataset.random_sample(4, int(cutting_len)) for step in range(0, dataset.iters): # total run 21120 samples batch_x, batch_y = dataset.next_batch() #dataset.next_batch() hist = np.zeros((batch_x.shape[0], dataset.vocabsize), dtype=np.float32) hist = list(hist) indices = np.ones((batch_x.shape[0], 1), dtype=np.float32) indices = list(indices) batch_y_mask = np.zeros( (batch_y.shape[0], batch_y.shape[1]) ) nonzeros = list( map(lambda x: (x != 0).sum() , batch_y ) ) for ind, row in enumerate(batch_y_mask): row[:nonzeros[ind]] = 1 if step == 0 and ep == 1: print(batch_x.shape, batch_y.shape) roll = np.random.rand() _, loss = sess.run([model.train_op, model.loss_op], feed_dict={ model.x: batch_x, model.caption: batch_y, model.caption_mask: batch_y_mask, model.prob_sch:roll_sch, model.roll:roll, model.hist:hist, model.indices:indices}) # print(logit_words) train_total_loss += loss if step % 10 == 0 or step == 1: # pred, current_embed = sess.run([model.pred, model.current_embed], # feed_dict={ model.x: batch_x, model.caption: batch_y}), # print(len(pred)) # print(pred[0]) # print('================================\n') # print(len(current_embed)) # print(current_embed[0]) print("Epoch: %2d, Step: %7d/%7d, Train_loss: %.4f, roll: %2.3f, roll_sch: %2.3f " % (ep, step, dataset.iters, loss, roll, roll_sch), end='\r') train_total_loss /= dataset.iters pre_loss = train_total_loss print("Epoch: %2d, Step: %7d/%7d, Train_loss: %2.4f " % (ep, step, dataset.iters, train_total_loss), end='\r') test_total_loss = 0 # total_iters = 75 # totalsample = dataset.random_sample(2400) # total_iters = np.ceil(totalsample/args.bs).astype(np.int32) dataset.random_sample(1) for step in range(0, dataset.iters): batch_x, batch_y = dataset.next_batch() #dataset.next_batch_val() hist = np.zeros((batch_x.shape[0], dataset.vocabsize), dtype=np.float32) hist = list(hist) indices = np.ones((batch_x.shape[0], 1), dtype=np.float32) indices = list(indices) # batch_y = np.column_stack((batch_y, np.zeros( [len(batch_y), 1] ))).astype(int) batch_y_mask = np.zeros( (batch_y.shape[0], batch_y.shape[1]) ) nonzeros = list( map(lambda x: (x != 0).sum() , batch_y ) ) for ind, row in enumerate(batch_y_mask): row[:nonzeros[ind]] = 1 roll = 0.0 loss = sess.run(model.loss_op, feed_dict={ model.x: batch_x, model.caption: batch_y, model.caption_mask: batch_y_mask, model.prob_sch:roll_sch, model.roll:roll, model.hist:hist, model.indices:indices}) test_total_loss += loss test_total_loss /= dataset.iters end_time = time.time() print("Epoch: %2d, take_time: %4.1fs, Train_loss: %2.4f, Test_loss: %2.4f " % (ep, (end_time-start_time), train_total_loss, test_total_loss)) fwirte.write("Epoch: %2d, take_time: %4.1fs, Train_loss: %2.4f, Test_loss: %2.4f\n" % (ep, (end_time-start_time), train_total_loss, test_total_loss)) saver.save(sess, args.savepath, global_step=ep) if ep > 50: roll_sch *= 0.99 if test_total_loss < 0.55: print('earlystop at epoch %d' %(ep)) break print('Done') print("Model saved in file: %s\n" % args.savepath) fwirte.write('Done') fwirte.close()
optimizer.apply_gradients(grads_and_vars=zip( grads, model.variables)) # 更新参数 if (batch + 1) % 50 == 0: print('[Epoch{} Batch{}] loss:{:.3f}'.format( epoch + 1, batch + 1, loss.numpy())) manager.save() # 每个epoch后保存一个checkpoint print('Epoch{} Loss: {:.5f}'.format(epoch + 1, np.mean(epoch_loss))) print('***************') if __name__ == '__main__': train_X = np.loadtxt('/data/train_X.txt', dtype='int') train_Y = np.loadtxt('/data/train_Y.txt', dtype='int') test_X = np.loadtxt('/data/test_X.txt', dtype='int') index2word, word2index, embedding_matrix = load_vocab_embedding_matrix() config = Configurations() train_dataset = tf.data.Dataset.from_tensor_slices( (train_X, train_Y)).batch(config.batch_size) model = Seq2seq(vocab_size=embedding_matrix.shape[0], embedding_dim=embedding_matrix.shape[1], embedding_matrix=embedding_matrix, gru_units=config.hid_dim, dropout_rate=config.dropout) training(model, train_dataset, config.epochs, config.learning_rate, word2index['<PAD>'])
tf_config.gpu_options.allow_growth = True model_path = "checkpoint/model.ckpt" if __name__ == "__main__": print("(1)load data......") docs_source = ['new jersey is usually hot during autumn , and it is never quiet in winter .\n'] docs_target = ["new jersey est généralement chaud pendant l' automne , et il est jamais calme en hiver .\n"] w2i_source, i2w_source = helper.load_vocab('./data/small_vocab_en.txt', helper.SOURCE_CODES) w2i_target, i2w_target = helper.load_vocab('./data/small_vocab_fr.txt', helper.TARGET_CODES) print("(2) build model......") config = config() config.source_vocab_size = len(w2i_source) config.target_vocab_size = len(w2i_target) model = Seq2seq(config, w2i_target, useTeacherForcing=False) print("(3) run model......") print_every = 100 max_target_len = 20 with tf.Session(config=tf_config) as sess: saver = tf.train.Saver() saver.restore(sess, model_path) source_batch, source_lens, target_batch, target_lens = get_batch(docs_source, w2i_source, docs_target, w2i_target, config.batch_size) feed_dict = { model.seq_inputs: source_batch, model.seq_inputs_len: source_lens,
import sys sys.path.append('/home/demolwang/demolwang/math_word_problem/critical-based/seq2seq_v2/src') from model import EncoderRNN, DecoderRNN_1, Seq2seq import torch from torch.autograd import Variable import torch.nn as nn import pdb embed_model = nn.Embedding(1000, 100) encode_model = EncoderRNN(1000, embed_model, 100, 128, 0, 0, 4, True, None, 'lstm', True) decode_model = DecoderRNN_1(1000, 10, embed_model, 100, 256, 3, None, 'gru', 1, 0, 0, 0) seq2seq = Seq2seq(encode_model, decode_model) input = Variable(torch.LongTensor([[1,2,4,5],[4,3,2,9]])) target = Variable(torch.LongTensor([[4,3,2], [11,3,4]])) lengths = [4,4] dol, dh, ssl = seq2seq(input, lengths, target, 0, 3) pdb.set_trace() pass
def run(user_question, seq2seq=Seq2seq(), proc=DataProcess()): pro_sent = proc.prepocess_sentence(user_question) txt_to_idx = seq2seq.convert_text_to_index(pro_sent, proc.word2idx) pred = seq2seq.predict_model() sentence = seq2seq.idx_to_sentence(txt_to_idx, pred) return sentence_spacing(sentence)
def main(): logging.info("(1) load data......") data = datasets.Lang8v1() data.process() data.show() # docs_source, docs_target = load_data("") # w2i_source, i2w_source = make_vocab(docs_source) # w2i_target, i2w_target = make_vocab(docs_target) config = Config() config.source_vocab_size = data.src_vocab_size config.target_vocab_size = data.tgt_vocab_size logging.info("(2) build model......") model = Seq2seq(config=config, src_embedding=data.src_embedding_matrix, tgt_embedding=data.tgt_embedding_matrix, useTeacherForcing=config.useTeacherForcing, useAttention=config.useAttention) logging.info("(3) run model......") with tf.Session(config=tf_config) as sess: tf.summary.FileWriter('graph', sess.graph) model.init(sess) best_epoch = 0 previous_losses = [] exp_loss = None exp_length = None exp_norm = None total_iters = 0 start_time = time.time() batches_per_epoch = data.nb_train / config.batch_size time_per_iter = None checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir) # last_epoch = -int(checkpoint_path[checkpoint_path.rfind('-'):]) last_epoch = findLatestCheckpointBatch(config.checkpoint_dir) # checkpoint_path = os.path.join('checkpoint', "best.ckpt-2") logging.info('last epoch: %s' % last_epoch) if debug: exit() if os.path.exists('checkpoint/checkpoint'): logging.info('Restore model from %s' % checkpoint_path) model.saver.restore(sess, checkpoint_path) else: logging.info("Created model with fresh parameters.") exit() if debug: exit() for epoch in range(last_epoch + 1, config.epochs): epoch_tic = time.time() current_step = 0 # for source_tokens, source_mask, target_tokens, target_mask in pair_iter(x_train, y_train, FLAGS.batch_size, FLAGS.num_layers): for batch_vars in data.get_batch(config.batch_size, 'train'): src_batch, tgt_batch, src_lens, tgt_lens = batch_vars # Get a batch and make a step. tic = time.time() loss, grad_norm, param_norm = model.train(*batch_vars) toc = time.time() iter_time = toc - tic # total_iters += np.sum(target_mask) # tps = total_iters / (time.time() - start_time) current_step += 1 # if current_step>5: break time_per_iter = (time.time() - epoch_tic) / current_step # lengths = np.sum(target_mask, axis=0) mean_length = np.mean(src_lens) std_length = np.std(src_lens) if not exp_loss: exp_loss = loss exp_length = mean_length exp_norm = grad_norm else: exp_loss = 0.99 * exp_loss + 0.01 * loss exp_length = 0.99 * exp_length + 0.01 * mean_length exp_norm = 0.99 * exp_norm + 0.01 * grad_norm loss = loss / mean_length if current_step == 1 or current_step % config.print_every == 0: logging.info( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) logging.info( 'epoch %d/%d, batch %d/%.0f\n loss %f, exp_loss %f, grad norm %f, param norm %f, length mean/std %f/%f' % (epoch, config.epochs, current_step, batches_per_epoch, loss, exp_loss / exp_length, grad_norm, param_norm, mean_length, std_length)) logging.info( 'Cost Time: {}, ETA: {}, iter time: {:.3f} sec\n'. format( sec2str(toc - start_time), sec2str(time_per_iter * (batches_per_epoch - current_step)), (time_per_iter))) predict_batch = model.predict(*batch_vars) logging.info('-' * 80) for i in range(3): logging.info("[src]: " + ' '.join([ data.src_i2w[num] for num in src_batch[i] if data.src_i2w[num] != PAD ])) logging.info("[tgt]: " + ' '.join([ data.tgt_i2w[num] for num in tgt_batch[i] if data.tgt_i2w[num] != PAD ])) logging.info("[prd]: " + ' '.join([ data.tgt_i2w[num] for num in predict_batch[i] if data.tgt_i2w[num] != PAD ])) logging.info('-') logging.info('-' * 80) logging.info("") if current_step % config.save_every == 0: logging.info('Saving model to {}'.format(checkpoint_path)) model.saver.save(sess, checkpoint_path) epoch_toc = time.time() logging.info('Cost Time: {}, Total ETA: {}\n'.format( sec2str(epoch_toc - start_time), sec2str((epoch_toc - epoch_tic) * (config.epochs - epoch)))) ## Validate # valid_cost = validate(model, sess, x_dev, y_dev) logging.info('validation ...') loss_dev = [] tot_iter = data.nb_dev / config.batch_size # nb_dev = config.batch_size*tot_iter for i, dev_batch in enumerate( data.get_batch(config.batch_size, 'dev')): t = model.test(*dev_batch) loss_dev.append(t) if i % max(1, tot_iter // 20) == 0: logging.info(' {:.2f}% loss: {:.2f}'.format( (i + 1) * 100 / tot_iter, t)) if i + 1 == tot_iter: break valid_loss = np.mean(loss_dev) logging.info("Epoch %d Validation cost: %.2f time: %s" % (epoch, valid_loss, sec2str(epoch_toc - epoch_tic))) ## Checkpoint checkpoint_path = os.path.join(config.checkpoint_dir, "best.ckpt") if len(previous_losses) > 2 and valid_loss > previous_losses[-1]: pass # logging.info("Annealing learning rate by %f" % FLAGS.learning_rate_decay_factor) # sess.run(model.learning_rate_decay_op) # model.saver.restore(sess, checkpoint_path + ("-%d" % best_epoch)) # else: logging.info('Saving checkpoint to {}'.format(checkpoint_path)) previous_losses.append(valid_loss) # best_epoch = epoch model.saver.save(sess, checkpoint_path, global_step=epoch) with open('checkpoint/log', 'a') as f: f.write('{:02d}: {:.6f}\n'.format(epoch, valid_loss))