def predict(args, model_name, restore_path): intent_dict = pkl.load(open(PATH + "/intent_dict.p", 'rb')) args.id2intent = {v: k for k, v in intent_dict.items()} vocab = WordVocab.load_vocab(PATH + args.vocab_path) poss_vocab = pkl.load(open(PATH + "/poss_vocab.p", 'rb')) args.num_layers = 1 args.vocab_size = len(vocab) args.class_nums = len(intent_dict) args.poss_num = len(poss_vocab) if args.use_pre_train_emb: vocab_emb = pkl.load(open('%s_vocab_emb.p' % args.task_name, 'rb')) args.vocab_emb = vocab_emb if model_name == 'BaseLSTM': model = BaseLstm(args, 'BaseLstm') elif model_name == 'BaseLstmStruct': model = BaseLstmStruct(args, 'BaseLstmStruct') elif model_name == 'BaseTransformerStruct': model = BaseTransformerStruct(args, 'BaseTransformerStruct') elif model_name == 'cnn': model = Cnn(args, 'cnn') elif model_name == 'TransformerCNN': model = TransformerCNN(args, 'TransformerCNN') elif model_name == 'LEAM': model = LEAM(args, 'LEAM') args.model_name = model_name model.build_placeholder() model.build_model() config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess = model.restore(sess, restore_path) pdd = PredictDataDeal(vocab=vocab, seq_len=args.seq_len, poss_vocab=poss_vocab, vocab_char=None) while True: sent = input("输入:") t1, t1_len, poss = pdd.predict(sent) pre_prob, pre_label = model.predict(sess, t1, t1_len, poss) print(args.id2intent[pre_label[0]], np.max(pre_prob, -1))
from dataset.dataset import BERTDatasetCreator from dataset import WordVocab from torch.utils.data import DataLoader import argparse import tqdm parser = argparse.ArgumentParser() parser.add_argument("-v", "--vocab_path", required=True, type=str) parser.add_argument("-c", "--corpus_path", required=True, type=str) parser.add_argument("-e", "--encoding", default="utf-8", type=str) parser.add_argument("-o", "--output_path", required=True, type=str) args = parser.parse_args() word_vocab = WordVocab.load_vocab(args.vocab_path) builder = BERTDatasetCreator(corpus_path=args.corpus_path, vocab=word_vocab, seq_len=None, encoding=args.encoding) with open(args.output_path, 'w', encoding=args.encoding) as f: for index in tqdm.tqdm(range(len(builder)), desc="Building Dataset", total=len(builder)): data = builder[index] output_form = "%s\t%s\t%s\t%s\t%d\n" t1_text, t2_text = [ " ".join(t) for t in [data["t1_random"], data["t2_random"]] ] t1_label, t2_label = [ " ".join([str(i) for i in label]) for label in [data["t1_label"], data["t2_label"]]
def train(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--train_dataset", required=True, type=str, help="train dataset for train bert") parser.add_argument("-t", "--valid_dataset", required=True, type=str, help="valid set for evaluate train set") parser.add_argument("-v", "--vocab_path", required=True, type=str, help="built vocab model path with vocab") parser.add_argument("-o", "--output_path", required=True, type=str, help="output/bert.model") parser.add_argument("-w", "--num_workers", type=int, default=0, help="dataloader worker size") parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=[0, 1, 2, 3], help="CUDA device ids") parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false") args = parser.parse_args() paths = Paths(args.output_path) print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", vocab.vocab_size) args.char_nums = vocab.vocab_size print("Loading Train Dataset", args.train_dataset) train_dataset = BERTDataset(args.train_dataset, vocab, corpus_lines=args.corpus_lines, on_memory=args.on_memory) print("Loading Valid Dataset", args.valid_dataset) valid_dataset = BERTDataset(args.valid_dataset, vocab, on_memory=args.on_memory) \ if args.valid_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=hp.batch_size, collate_fn=lambda batch: collate_mlm(batch), num_workers=args.num_workers, shuffle=False) valid_data_loader = DataLoader(valid_dataset, batch_size=hp.batch_size, collate_fn=lambda batch: collate_mlm(batch), num_workers=args.num_workers, shuffle=False) \ if valid_dataset is not None else None print("Building BERT model") bert = BERT(embed_dim=hp.embed_dim, hidden=hp.hidden, args=args) print("Creating BERT Trainer") trainer = BERTTrainer(bert, vocab.vocab_size, train_dataloader=train_data_loader, test_dataloader=valid_data_loader, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, args=args, path=paths) print("Training Start") trainer.train()
def __init__(self, model_name='/'): self.sess_dict = {} self.queryObj_dict = {} config = tf.ConfigProto(allow_soft_placement=True) model_name_meta = '%s.meta' % model_name saver = tf.train.import_meta_graph(model_name_meta) # 加载图结构 gragh = tf.get_default_graph() # 获取当前图,为了后续训练时恢复变量 tensor_name_list = [ tensor.name for tensor in gragh.as_graph_def().node ] # 得到当前图中所有变量的名称 for ele in tensor_name_list: if str(ele).__contains__('out_softmax'): print(ele) args_dict = json.load(open('./Configs/BaseLstm.config', 'r')) self.args = dict_to_object(dict(args_dict)) self.label_vocab1 = pkl.load(open("./label_vocab1.p", 'rb')) self.label_vocab2 = pkl.load(open("./label_vocab2.p", 'rb')) self.label_vocab3 = pkl.load(open("./label_vocab3.p", 'rb')) self.vocab = WordVocab.load_vocab(PATH + self.args.vocab_path) self.poss_vocab = pkl.load(open("./poss_vocab.p", 'rb')) with tf.device('/device:GPU:%s' % 0): self.sent_token = gragh.get_tensor_by_name('sent1_token:0') self.sent_char = gragh.get_tensor_by_name('sent1_char:0') self.sent_word_re = gragh.get_tensor_by_name('sent_word_re:0') self.sent_word_re_char = gragh.get_tensor_by_name( 'sent_word_re_char:0') self.sent_len = gragh.get_tensor_by_name('sent1_len:0') self.sent_len_char = gragh.get_tensor_by_name('sent_len_char:0') self.sent_len_re = gragh.get_tensor_by_name('sent1_len_re:0') self.sent_len_re_char = gragh.get_tensor_by_name( 'sent1_len_re_char:0') self.sent_token_neg = gragh.get_tensor_by_name('sent1_token_neg:0') self.sent_len_neg = gragh.get_tensor_by_name('sent1_len_neg:0') self.sent_char_neg = gragh.get_tensor_by_name('sent_char_neg:0') self.sent_char_len_neg = gragh.get_tensor_by_name( 'sent_char_len_neg:0') self.key_emb = gragh.get_tensor_by_name('key_emb:0') self.dropout = gragh.get_tensor_by_name('dropout:0') name = model_name.split('/')[-1].split('_')[0].replace( "BaseLSTM", "BaseLstm") try: self.soft_out_1 = gragh.get_tensor_by_name( '%s_enc_0/_0/softmax/Softmax:0' % name) self.soft_out_2 = gragh.get_tensor_by_name( '%s_enc_1/_1/softmax/Softmax:0' % name) self.soft_out_3 = gragh.get_tensor_by_name( '%s_enc_2/_2/softmax/Softmax:0' % name) except: self.soft_out_1 = gragh.get_tensor_by_name( '%s_enc_0/_0/out_softmax/softmax/Softmax:0' % name) self.soft_out_2 = gragh.get_tensor_by_name( '%s_enc_1/_1/out_softmax/softmax/Softmax:0' % name) self.soft_out_3 = gragh.get_tensor_by_name( '%s_enc_2/_2/out_softmax/softmax/Softmax:0' % name) try: self.smentic_out_1 = gragh.get_tensor_by_name( '%s_enc_0/_0/semantic_out/concat:0' % name) self.smentic_out_2 = gragh.get_tensor_by_name( '%s_enc_1/_1/semantic_out/concat:0' % name) self.smentic_out_3 = gragh.get_tensor_by_name( '%s_enc_2/_2/semantic_out/concat:0' % name) except: pass self.sess = tf.Session(config=config) saver.restore(self.sess, '%s' % model_name) self.pdd = PredictDataDeal(vocab=self.vocab, seq_len=self.args.seq_len, poss_vocab=self.poss_vocab)
def test(): os.environ['CUDA_LAUNCH_BLOCKING'] = "1" parser = argparse.ArgumentParser() parser.add_argument("-c", "--train_dataset", type=str, help="train dataset for train bert", default='./data/corpus_pre.txt') parser.add_argument("-t", "--valid_dataset", type=str, help="valid set for evaluate train set", default='./data/corpus_pre.txt') parser.add_argument("-v", "--vocab_path", type=str, help="built vocab model path with vocab", default='./data/vocab.test') parser.add_argument("-o", "--output_path", type=str, help="output/bert.model", default='./output') parser.add_argument("-w", "--num_workers", type=int, default=0, help="dataloader worker size") parser.add_argument("--with_cuda", type=bool, default=False, help="training with CUDA: true, or false") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=[0, 1, 2, 3], help="CUDA device ids") parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") args = parser.parse_args() set_seed(args) paths = Paths(args.output_path) print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", vocab.vocab_size) args.char_nums = vocab.vocab_size print("Loading Train Dataset", args.train_dataset) train_dataset = BERTDataset(args.train_dataset, vocab, corpus_lines=args.corpus_lines, on_memory=args.on_memory) print("Loading Valid Dataset", args.valid_dataset) valid_dataset = BERTDataset(args.valid_dataset, vocab, on_memory=args.on_memory) \ if args.valid_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=hp.batch_size, collate_fn=lambda batch: collate_mlm(batch), num_workers=args.num_workers, shuffle=False) # 训练语料按长度排好序的 valid_data_loader = DataLoader(valid_dataset, batch_size=hp.batch_size, collate_fn=lambda batch: collate_mlm(batch), num_workers=args.num_workers, shuffle=False) \ if valid_dataset is not None else None print("Load BERT model") bert = torch.load('./output/model_bert/bert_ep10.model') model = torch.load('./output/model_mlm/mlm_ep10.model') print("Creating BERT Trainer") trainer = BERTTrainer(bert, vocab.vocab_size, model, train_dataloader=train_data_loader, test_dataloader=valid_data_loader, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, args=args, path=paths) print("Training Start") trainer.evaluate_and_print(vocab)
import argparse from dataset import WordVocab parser = argparse.ArgumentParser() parser.add_argument("-c", "--corpus_path", required=True, type=str) parser.add_argument("-o", "--output_path", required=True, type=str) parser.add_argument("-s", "--vocab_size", type=int, default=None) parser.add_argument("-e", "--encoding", type=str, default="utf-8") parser.add_argument("-m", "--min_freq", type=int, default=1) args = parser.parse_args() with open(args.corpus_path, "r", encoding=args.encoding) as f: vocab = WordVocab(f, max_size=args.vocab_size, min_freq=args.min_freq) vocab.save_vocab(args.output_path)
def train(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--train_dataset", required=True, type=str, help="train dataset for train bert") parser.add_argument("-t", "--test_dataset", type=str, default=None, help="test set for evaluate train set") parser.add_argument("-v", "--vocab_path", required=True, type=str, help="built vocab model path with bert-vocab") parser.add_argument("-o", "--output_path", required=True, type=str, help="ex)output/bert.model") parser.add_argument("-hs", "--hidden", type=int, default=256, help="hidden size of transformer model") parser.add_argument("-l", "--layers", type=int, default=8, help="number of layers") parser.add_argument("-a", "--attn_heads", type=int, default=8, help="number of attention heads") parser.add_argument("-s", "--seq_len", type=int, default=20, help="maximum sequence len") parser.add_argument("-b", "--batch_size", type=int, default=64, help="number of batch_size") parser.add_argument("-e", "--epochs", type=int, default=10, help="number of epochs") parser.add_argument("-w", "--num_workers", type=int, default=5, help="dataloader worker size") parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=None, help="CUDA device ids") parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam") parser.add_argument("--adam_weight_decay", type=float, default=0.01, help="weight_decay of adam") parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value") parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam first beta value") args = parser.parse_args() print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", len(vocab)) print("Loading Train Dataset", args.train_dataset) train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.seq_len, corpus_lines=args.corpus_lines, on_memory=args.on_memory) print("Loading Test Dataset", args.test_dataset) test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \ if args.test_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None print("Building BERT model") bert = BERT(len(vocab), hidden=args.hidden, n_layers=args.layers, attn_heads=args.attn_heads) print("Creating BERT Trainer") trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq) print("Training Start") for epoch in range(args.epochs): print("eee") trainer.train(epoch) trainer.save(epoch, args.output_path) if test_data_loader is not None: trainer.test(epoch)
corpus_path = os.path.join(args.dir_path, 'paths.csv') contexts_path = os.path.join(args.dir_path, 'path_contexts.csv') nodes_vocab = pd.read_csv(node_path) nodes_vocab['node_type'] = nodes_vocab.apply( lambda x: '_'.join(x['node_type'].split()), axis=1) node_dict = nodes_vocab.set_index('id').to_dict(orient='dict') node_dict = node_dict['node_type'] paths = pd.read_csv(corpus_path) paths = paths.apply(lambda x: ' '.join( [node_dict.get(int(i), '<unk>') for i in x['path'].split(' ')]), axis=1) path_list = paths.values.tolist() vocab = WordVocab(path_list, max_size=args.vocab_size, min_freq=args.min_freq) print("VOCAB SIZE:", len(vocab)) vocab.save_vocab(os.path.join(args.output_dir_path, 'path_vocab.pickle')) f = open(os.path.join(args.output_dir_path, 'nl_vocab.pickle'), 'rb') nl_vocab = pickle.load(f) f.close() def process_tokens(x): split_list = split_camel(x['token'], x) split_list = nl_vocab.to_seq(split_list) return ' '.join([str(i) for i in split_list]) tokens_paths = [ os.path.join(args.dir_path, 'tokens.csv'),
def build_vocab(corpus_path, vocab_path, mode): with open(corpus_path, "r", encoding='utf-8') as f: vocab = WordVocab(f, max_size=None, min_freq=1, mode=mode) print("VOCAB SIZE:", len(vocab)) vocab.save_vocab(vocab_path)
def train(args, model_name): if not os.path.exists(PATH + args.output_path): os.mkdir(PATH + args.output_path) _logger.info("new_vocab:%s" % args.new_vocab) _logger.info("use_tfrecord:%s" % args.use_tfrecord) _logger.info("train_dataset:%s" % args.train_dataset) _logger.info("test_dataset:%s" % args.test_dataset) _logger.info("task_name:%s" % args.task_name) _logger.info("model_name:%s" % args.model_name) _logger.info("new_tfrecord:%s" % args.new_tfrecord) _logger.info("restore_model:%s" % args.restore_model) _logger.info("use_pre_train_emb:%s" % args.use_pre_train_emb) _logger.info("build label vocab") if args.new_label_vocab: intent_dict = build_label_vocab(args.train_dataset, args.test_dataset) _logger.info("%s %s" % (intent_dict, len(intent_dict))) pkl.dump(intent_dict, open(PATH + "/intent_dict.p", 'wb')) intent_dict = pkl.load(open(PATH + "/intent_dict.p", 'rb')) args.id2intent = {v: k for k, v in intent_dict.items()} ### load word_vocab if not args.new_vocab and os.path.exists(PATH + args.vocab_path): _logger.info("Loading Vocab: %s" % (PATH + args.vocab_path)) vocab = WordVocab.load_vocab(PATH + args.vocab_path) else: _logger.info("build vocab") build_vocab(args.train_dataset, PATH + args.vocab_path, mode='word_char') _logger.info("Loading Vocab: %s" % (PATH + args.vocab_path)) vocab = WordVocab.load_vocab(PATH + args.vocab_path) _logger.info("Vocab Size:%s" % (len(vocab))) poss_vocab = build_poss_vocab(args.train_dataset, args.test_dataset) pkl.dump(poss_vocab, open(PATH + "/poss_vocab.p", 'wb')) poss_vocab = pkl.load(open(PATH + "/poss_vocab.p", 'rb')) ### load pre_train Embedding # print(vocab.stoi) args.num_layers = 1 args.vocab_size = len(vocab) args.class_nums = len(intent_dict) args.poss_num = len(poss_vocab) if args.use_pre_train_emb: if args.new_pre_vocab: pre_emb_cls = PreTrainVocab(args.pre_train_emb_path, args.pre_train_emb_size) vocab_emb = pre_emb_cls.getEmbeddimhArray(vocab) pkl.dump(vocab_emb, open('%s_vocab_emb.p' % args.task_name, 'wb')) args.vocab_emb = vocab_emb else: vocab_emb = pkl.load(open('%s_vocab_emb.p' % args.task_name, 'rb')) args.vocab_emb = vocab_emb _logger.info('load pre_train_emb finish emb_array size:%s' % (len(vocab_emb))) ### build tfrecord if not os.path.exists(PATH + args.train_tfrecord_path) or not os.path.exists( PATH + args.test_tfrecord_path) or args.new_tfrecord: _logger.info('building tfrecords') DataSetTfrecord( args.train_dataset, vocab, args.seq_len, intent_dict=intent_dict, poss_vocab=poss_vocab, out_path=PATH + args.train_tfrecord_path, ) DataSetTfrecord(args.test_dataset, vocab, args.seq_len, poss_vocab=poss_vocab, intent_dict=intent_dict, out_path=PATH + args.test_tfrecord_path) _read_tfRecord = read_tfRecord(args.seq_len, args.batch_size) _logger.info("loading tfrecords") train_data_loader = _read_tfRecord(PATH + args.train_tfrecord_path) test_data_loader = _read_tfRecord(PATH + args.test_tfrecord_path) train_num = int([ e for e in open(PATH + args.train_tfrecord_path + ".index", 'r', encoding='utf-8').readlines() ][0]) test_num = int([ e for e in open(PATH + args.test_tfrecord_path + ".index", 'r', encoding='utf-8').readlines() ][0]) _logger.info('train_num:%s test_num:%s' % (train_num, test_num)) args.train_num = train_num args.test_num = test_num _logger.info('%s batch_size:%s use_tfrecod:%s' % (args.model_name, args.batch_size, args.use_tfrecord)) for index, e in enumerate(train_data_loader): if index % 10: print(e) # ### 模型选择 BaseTransformerStruct # model_name=args.model_name if model_name == 'BaseLSTM': model = BaseLstm(args, 'BaseLstm') elif model_name == 'BaseLstmStruct': model = BaseLstmStruct(args, 'BaseLstmStruct') elif model_name == 'BaseTransformerStruct': model = BaseTransformerStruct(args, 'BaseTransformerStruct') elif model_name == 'cnn': model = Cnn(args, 'cnn') elif model_name == 'TransformerCNN': model = TransformerCNN(args, 'TransformerCNN') elif model_name == 'LEAM': model = LEAM(args, 'LEAM') args.model_name = model_name model.build_placeholder() model.build_model() model.build_accuracy() model.build_loss() model.build_op() if args.restore_model == '': model.train(train_data_loader, test_data_loader, restore_model=None, save_model=PATH + "/output/%s_%s_2kw.ckpt" % (model_name, args.task_name)) else: model.train(train_data_loader, test_data_loader, restore_model=PATH + args.restore_model, save_model=PATH + "/output/%s_%s_2kw.ckpt" % (model_name, args.task_name))
def __init__(self, bert, vocab_path): # load vocab for tokenization self.vocab = WordVocab.load_vocab(vocab_path) # load pretrained model self.bert = bert
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) print("Loading Vocab", args.vocab_path) vocab = WordVocab.load_vocab(args.vocab_path) print("Vocab Size: ", len(vocab)) print("Loading Train Dataset", args.train_dataset) train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.max_sequence_length, corpus_lines=args.corpus_lines, on_memory=args.on_memory) print("Loading Test Dataset", args.test_dataset) test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.max_sequence_length, on_memory=args.on_memory) \ if args.test_dataset is not None else None print("Creating Dataloader") train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ if test_dataset is not None else None splits = ['train', 'test'] data_loaders = { 'train': train_data_loader, 'test': test_data_loader } model = SentenceVAE( vocab_size=len(vocab), sos_idx=vocab.sos_index, eos_idx=vocab.eos_index, pad_idx=vocab.pad_index, unk_idx=vocab.unk_index, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if torch.cuda.is_available(): model = model.cuda() print(model) if args.tensorboard_logging: writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args,ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) save_model_path = os.path.join(args.save_model_path) if not os.path.exists(save_model_path): os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1/(1+np.exp(-k*(step-x0)))) elif anneal_function == 'linear': return min(1, step/x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=vocab.pad_index) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor step = 0 for epoch in range(args.epochs): for split in splits: data_loader = data_loaders[split] tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() correct = 0 close = 0 total = 0 for iteration, batch in enumerate(data_loader): batch_size = batch['input'].size(0) for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass logp, mean, logv, z = model(batch['input'], batch['raw_length']) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['raw_length'], mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss)/batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 correct += logp.argmax(dim=1).eq(batch['target']).sum().item() close += torch.mul(logp.argmax(dim=1).ge(batch["target"]-10), logp.argmax(dim=1).le(batch["target"]+10)).sum().item() total += batch['target'].nelement() # bookkeepeing tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.view(1,))) if args.tensorboard_logging: writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration) writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration) writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration) if iteration % args.print_every == 0 or iteration+1 == len(data_loader): print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" %(split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size, KL_loss.item()/batch_size, KL_weight)) if split == 'valid': if 'target_sents' not in tracker: tracker['target_sents'] = list() tracker['target_sents'] += idx2word(batch['raw'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) print("%s Epoch %02d/%i, Mean ELBO %9.4f, acc %f, clo %f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']), correct/total, close/total)) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch) # save a dump of all sentences and the encoded latent space if split == 'valid': dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} if not os.path.exists(os.path.join('dumps', ts)): os.makedirs('dumps/'+ts) with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: json.dump(dump,dump_file) # save checkpoint if split == 'train': checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s"%checkpoint_path)