def train(): logging.info('Loading vocab,train and val dataset.Wait a second,please') embed = torch.Tensor(np.load(args.embedding)['embedding']) with open(args.word2id) as f: word2id = json.load(f) vocab = utils.Vocab(embed, word2id) with open(args.train_dir) as f: examples = [json.loads(line) for line in f] train_dataset = utils.Dataset(examples) with open(args.val_dir) as f: examples = [json.loads(line) for line in f] val_dataset = utils.Dataset(examples) # update args args.embed_num = embed.size(0) args.embed_dim = embed.size(1) args.kernel_sizes = [int(ks) for ks in args.kernel_sizes.split(',')] # build model net = getattr(models, args.model)(args, embed) if use_gpu: net.cuda() # load dataset train_iter = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) val_iter = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False) # loss function criterion = nn.BCELoss() # model info print(net) params = sum(p.numel() for p in list(net.parameters())) / 1e6 print('#Params: %.1fM' % (params)) min_loss = float('inf') optimizer = torch.optim.Adam(net.parameters(), lr=args.lr) net.train() t1 = time() for epoch in range(1, args.epochs + 1): for i, batch in enumerate(train_iter): features, targets, _, doc_lens = vocab.make_features(batch) features, targets = Variable(features), Variable(targets.float()) if use_gpu: features = features.cuda() targets = targets.cuda() probs = net(features, doc_lens) loss = criterion(probs, targets) optimizer.zero_grad() loss.backward() clip_grad_norm(net.parameters(), args.max_norm) optimizer.step() if args.debug: print('Batch ID:%d Loss:%f' % (i, loss.data[0])) continue if i % args.report_every == 0: cur_loss = eval(net, vocab, val_iter, criterion) if cur_loss < min_loss: min_loss = cur_loss best_path = net.save() logging.info('Epoch: %2d Min_Val_Loss: %f Cur_Val_Loss: %f' % (epoch, min_loss, cur_loss)) t2 = time() logging.info('Total Cost:%f h' % ((t2 - t1) / 3600))
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data = tree.simplified_data( 700, 100, 200) self.vocab = utils.Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) # add input placeholders self.is_leaf_placeholder = tf.compat.v1.placeholder( tf.bool, (None), name='is_leaf_placeholder') self.left_children_placeholder = tf.compat.v1.placeholder( tf.int32, (None), name='left_children_placeholder') self.right_children_placeholder = tf.compat.v1.placeholder( tf.int32, (None), name='right_children_placeholder') self.node_word_indices_placeholder = tf.compat.v1.placeholder( tf.int32, (None), name='node_word_indices_placeholder') self.labels_placeholder = tf.compat.v1.placeholder( tf.int32, (None), name='labels_placeholder') # add model variables with tf.compat.v1.variable_scope('Embeddings'): embeddings = tf.compat.v1.get_variable( 'embeddings', [len(self.vocab), self.config.embed_size]) with tf.compat.v1.variable_scope('Composition'): W1 = tf.compat.v1.get_variable( 'W1', [2 * self.config.embed_size, self.config.embed_size]) b1 = tf.compat.v1.get_variable('b1', [1, self.config.embed_size]) with tf.compat.v1.variable_scope('Projection'): U = tf.compat.v1.get_variable( 'U', [self.config.embed_size, self.config.label_size]) bs = tf.compat.v1.get_variable('bs', [1, self.config.label_size]) # build recursive graph tensor_array = tf.TensorArray(tf.float32, size=0, dynamic_size=True, clear_after_read=False, infer_shape=False) def embed_word(word_index): with tf.device('/cpu:0'): return tf.expand_dims(tf.gather(embeddings, word_index), 0) def combine_children(left_tensor, right_tensor): return tf.nn.relu( tf.matmul(tf.concat([left_tensor, right_tensor], 1), W1) + b1) def loop_body(tensor_array, i): node_is_leaf = tf.gather(self.is_leaf_placeholder, i) node_word_index = tf.gather(self.node_word_indices_placeholder, i) left_child = tf.gather(self.left_children_placeholder, i) right_child = tf.gather(self.right_children_placeholder, i) node_tensor = tf.cond( node_is_leaf, lambda: embed_word(node_word_index), lambda: combine_children(tensor_array.read(left_child), tensor_array.read(right_child))) tensor_array = tensor_array.write(i, node_tensor) i = tf.add(i, 1) return tensor_array, i loop_cond = lambda tensor_array, i: \ tf.less(i, tf.squeeze(tf.shape(self.is_leaf_placeholder))) self.tensor_array, _ = tf.while_loop(loop_cond, loop_body, [tensor_array, 0], parallel_iterations=1) # add projection layer self.logits = tf.matmul(self.tensor_array.concat(), U) + bs self.root_logits = tf.matmul( self.tensor_array.read(self.tensor_array.size() - 1), U) + bs self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1)) # add loss layer regularization_loss = self.config.l2 * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)) included_indices = tf.where(tf.less(self.labels_placeholder, 2)) self.full_loss = regularization_loss + tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.gather(self.logits, included_indices), labels=tf.gather(self.labels_placeholder, included_indices))) self.root_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.root_logits, labels=self.labels_placeholder[-1:])) # add training op self.train_op = tf.train.GradientDescentOptimizer( self.config.lr).minimize(self.full_loss)
def test(): embed = torch.Tensor(np.load(args.embedding)['embedding']) with open(args.word2id) as f: word2id = json.load(f) vocab = utils.Vocab(embed, word2id) with open(args.test_dir) as f: examples = [json.loads(line) for line in f] test_dataset = utils.Dataset(examples) test_iter = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) if use_gpu: checkpoint = torch.load(args.load_dir, map_location='cuda:0') else: checkpoint = torch.load(args.load_dir, map_location=lambda storage, loc: storage) # checkpoint['args']['device'] saves the device used as train time # if at test time, we are using a CPU, we must override device to None if not use_gpu: checkpoint['args'].device = None net = getattr(models, checkpoint['args'].model)(checkpoint['args']) net.load_state_dict(checkpoint['model']) if use_gpu: net.cuda() net.eval() doc_num = len(test_dataset) time_cost = 0 file_id = 1 for batch in tqdm(test_iter): features, _, summaries, doc_lens = vocab.make_features(batch) t1 = time() if use_gpu: probs = net(Variable(features).cuda(), doc_lens) else: probs = net(Variable(features), doc_lens) t2 = time() time_cost += t2 - t1 start = 0 for doc_id, doc_len in enumerate(doc_lens): stop = start + doc_len prob = probs[start:stop] topk = min(args.topk, doc_len) try: topk_indices = prob.topk(topk)[1].cpu().data.numpy() except: continue topk_indices.sort() doc = batch['doc'][doc_id].split('\n')[:doc_len] hyp = [doc[index] for index in topk_indices] ref = summaries[doc_id] with open(os.path.join(args.ref, str(file_id) + '.txt'), 'w') as f: f.write(ref) with open(os.path.join(args.hyp, str(file_id) + '.txt'), 'w') as f: f.write('\n'.join(hyp)) start = stop file_id = file_id + 1 print('Speed: %.2f docs / s' % (doc_num / time_cost))
def train(): #command: python main.py -device -1 -batch_size 32 -model RNN_RNN -seed 1 -save_dir checkpoints/XXX.pt logging.info('Loading vocab,train and val dataset.Wait a second,please') #embedding --->load default='data/embedding.npz' embed = torch.Tensor(np.load(args.embedding)['embedding']) #word2id ---> load default='data/word2id.json' with open(args.word2id) as f: word2id = json.load(f) vocab = utils.Vocab(embed, word2id) # load training dataset #train_dir--->load default='data/train.json' with open(args.train_dir) as f: examples = [json.loads(line) for line in f] train_dataset = utils.Dataset(examples) # load validation dataset with open(args.val_dir) as f: examples = [json.loads(line) for line in f] val_dataset = utils.Dataset(examples) # update args args.embed_num = embed.size(0) #number of embeddings(default---> 100) args.embed_dim = embed.size(1) # size of each embedding (default---> 100) args.kernel_sizes = [int(ks) for ks in args.kernel_sizes.split(',')] #(default kernel size for pooling---> {3,4,5} ) # build model net = getattr(models,args.model)(args,embed) #get model and embedding train_iter = DataLoader(dataset=train_dataset,batch_size=args.batch_size, shuffle=True) val_iter = DataLoader(dataset=val_dataset,batch_size=args.batch_size, shuffle=False) # loss function #Creates a criterion that measures the Binary Cross Entropy between the target and the output: criterion = nn.BCELoss() # model info print("model info is: ", net) params = sum(p.numel() for p in list(net.parameters())) / 1e6 print('#Params: %.1fM' % (params)) min_loss = float('inf') optimizer = torch.optim.Adam(net.parameters(),lr=args.lr) net.train() t1 = time() for epoch in range(1,args.epochs+1): for i,batch in enumerate(train_iter): features,targets,_,doc_lens = vocab.make_features(batch) features,targets = Variable(features), Variable(targets.float()) if use_gpu: features = features.cuda() targets = targets.cuda() else: print("no gpu") probs = net(features,doc_lens) #criteria(expected, target) loss = criterion(probs,targets) optimizer.zero_grad() loss.backward() clip_grad_norm(net.parameters(), args.max_norm) optimizer.step() if args.debug: print('Batch ID:%d Loss:%f' %(i,loss.data[0])) continue if i % args.report_every == 0: cur_loss = eval(net,vocab,val_iter,criterion) if cur_loss < min_loss: min_loss = cur_loss best_path = net.save() logging.info('Epoch: %2d Min_Val_Loss: %f Cur_Val_Loss: %f' % (epoch,min_loss,cur_loss)) t2 = time() logging.info('Total Cost:%f h'%((t2-t1)/3600))
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data = tree.simplified_data( 700, 100, 200) # print("data ",self.train_data)) self.vocab = utils.Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) cluster = tf.train.ClusterSpec( {"local": ["localhost:2222", "localhost:2223"]}) # add input placeholders self.is_leaf_placeholder = tf.placeholder(tf.int32, (None), name='is_leaf_placeholder') self.node_word_indices_placeholder = tf.placeholder( tf.int32, (None), name='node_word_indices_placeholder') self.labels_placeholder = tf.placeholder(tf.int32, (None), name='labels_placeholder') self.cons_placeholder = tf.placeholder(tf.int32, (None), name='cons') # add model variables # making initialization deterministic for now # initializer = tf.random_normal_initializer(seed=1) with tf.variable_scope('Embeddings'): self.embeddings = tf.get_variable( 'embeddings', [len(self.vocab), self.config.embed_size]) with tf.variable_scope('Composition'): W1 = tf.get_variable( 'W1', [2 * self.config.embed_size, self.config.embed_size]) b1 = tf.get_variable('b1', [1, self.config.embed_size]) with tf.variable_scope('Projection'): U = tf.get_variable( 'U', [self.config.embed_size, self.config.label_size]) bs = tf.get_variable('bs', [1, self.config.label_size]) # Build recursive graph def embed_word(word_index, embeddings): return tf.expand_dims(tf.gather(embeddings, word_index), 0) def combine_children(left_tensor, right_tensor, W, b): return tf.nn.relu( tf.matmul(tf.concat([left_tensor, right_tensor], 1), W) + b) def find_loss(node_tensor, i, labels, U, bs): # add projection layer node_logits = tf.matmul(node_tensor, U) + bs loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=node_logits, labels=labels[i:i + 1]) return loss def base_case(node_word_indices, i, embeddings, labels, U, bs): word_index = tf.gather(node_word_indices, i) node_tensor = embed_word(word_index, embeddings) loss = find_loss(node_tensor, i, labels, U, bs) return [node_tensor, loss] def rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs): with tf.device("/job:local/replica:0/task:0/device:CPU:0"): left_node, left_loss = rec(i * 2, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs) right_node, right_loss = rec(i * 2 + 1, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs) with tf.device("/job:local/replica:0/task:1/device:CPU:0"): node_tensor = combine_children(left_node, right_node, W, b) node_loss = find_loss(node_tensor, i, labels, U, bs) loss = tf.concat([left_loss, node_loss, right_loss], 0) return [node_tensor, loss] # Function Declaration rec = function.Declare("Rec", [("i", tf.int32), ("is_leaf", tf.int32), ("node_word_indices", tf.int32), ("embeddings", tf.float32), ("W", tf.float32), ("b", tf.float32), ("labels", tf.int32), ("U", tf.float32), ("bs", tf.float32)], [("ret", tf.float32), ("ret1", tf.float32)]) # Function Definition @function.Defun(tf.int32, tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, func_name="Rec", grad_func="GradFac", create_grad_func=True, out_names=["ret", "ret1"]) def RecImpl(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs): node_tensor, loss = \ tf.cond(tf.equal(tf.gather(is_leaf, i), tf.constant(1)), lambda: base_case(node_word_indices, i, embeddings, labels, U, bs), lambda: rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs)) return [node_tensor, loss] RecImpl.add_to_graph(tf.get_default_graph()) self.node_tensor, self.full_loss = rec( self.cons_placeholder, self.is_leaf_placeholder, self.node_word_indices_placeholder, self.embeddings, W1, b1, self.labels_placeholder, U, bs) # add projection layer self.root_logits = tf.matmul(self.node_tensor, U) + bs self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1)) # add loss layer with tf.device("/job:local/replica:0/task:1/device:CPU:0"): l1 = tf.nn.l2_loss(W1) with tf.device("/job:local/replica:0/task:0/device:CPU:0"): l2 = tf.nn.l2_loss(U) l = l1 + l2 regularization_loss = self.config.l2 * l with tf.device("/job:local/replica:0/task:1/device:CPU:0"): x = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.root_logits, labels=self.labels_placeholder[1:2]) self.root_loss = regularization_loss + tf.reduce_sum(x) # # add training op self.full_loss = tf.reduce_sum(self.full_loss) self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize( self.full_loss)
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data = tree.simplified_data(700, 100, 200) self.config.max_tree_nodes = tree.get_max_tree_nodes(self.train_data + self.dev_data + self.test_data) print(self.config.max_tree_nodes) # print("data ",self.train_data)) self.vocab = utils.Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) # add input placeholders dim1 = self.config.batch_size dim2 = self.config.max_tree_nodes self.is_leaf_placeholder = tf.placeholder( tf.bool, [dim1, dim2], name='is_leaf_placeholder') self.left_children_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='left_children_placeholder') self.right_children_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='right_children_placeholder') self.node_word_indices_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='node_word_indices_placeholder') self.labels_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='labels_placeholder') self.tree_size_placeholder = tf.placeholder( tf.int32, [dim1], name='tree_size_placeholder') # add model variables # making initialization deterministic for now # initializer = tf.random_normal_initializer(seed=1) with tf.variable_scope('Embeddings'): self.embeddings = tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size]) with tf.variable_scope('Composition'): self.W1 = tf.get_variable('W1', [2 * self.config.embed_size, self.config.embed_size]) self.b1 = tf.get_variable('b1', [1, self.config.embed_size]) with tf.variable_scope('Projection'): self.U = tf.get_variable('U', [self.config.embed_size, self.config.label_size]) self.bs = tf.get_variable('bs', [1, self.config.label_size]) # Build recursive graph outloss = [] prediction = [] root_loss = [] for idx_batch in range(self.config.batch_size): self.root_prediction, self.full_loss, self.root_loss = self.compute_tree(idx_batch) prediction.append(self.root_prediction) outloss.append(self.full_loss) root_loss.append(self.root_loss) batch_loss = tf.stack(outloss) self.pred = tf.stack(prediction) self.rloss = tf.stack(root_loss) # Compute batch loss self.total_loss = tf.reduce_mean(batch_loss) # Add training op self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize(self.total_loss)
def _build_vocab(self): self.vocab = utils.Vocab(cfg.vocab_size) vp = cfg.vocab_path_train if cfg.mode == 'train' or cfg.vocab_path_eval is None else cfg.vocab_path_eval # vp = cfg.vocab_path+'.json.freq.json' self.vocab.load_vocab(vp) return self.vocab.vocab_size
def test(): logging.info('Loading vocab,test dataset.Wait a second,please') embed = torch.Tensor(np.load(args.embedding)['embedding']) with open(args.word2id,encoding='utf-8') as f: word2id = json.load(f) vocab = utils.Vocab(embed, word2id) # update args args.embed_num = embed.size(0) args.embed_dim = embed.size(1) # build model with open(args.test_dir,encoding='utf-8') as f: examples = [json.loads(line) for line in f] test_dataset = utils.Dataset(examples) test_iter = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) if use_gpu: checkpoint = torch.load(args.load_dir) else: checkpoint = torch.load(args.load_dir, map_location=lambda storage, loc: storage) # checkpoint['args']['device'] saves the device used as train time # if at test time, we are using a CPU, we must override device to None if not use_gpu: checkpoint['args'].device = None net = getattr(model,checkpoint['args'].model)(checkpoint['args'],embed) net.load_state_dict(checkpoint['model']) if use_gpu: net.cuda() net.eval() print('running test!') doc_num = len(test_dataset) time_cost = 0 file_id = 1 count = 0 for batch in tqdm(test_iter): count+=1 features, targets,summaries, doc_lens = vocab.make_features(batch) t1 = time() if use_gpu: features = features.cuda() targets = targets.cuda() probs = net(features, doc_lens) # probs=probs.to('cpu') # y_pred = np.where(probs>=0.6,1,0) # y_true = targets # accuracy += accuracy_score(y_true,y_pred) start = 0 for doc_id,doc_len in enumerate(doc_lens): stop = start + doc_len prob = probs[start:stop] # print(prob) # label = labels[start:stop] # prob_n = prob.cpu().data.numpy() topk = min(args.topk,doc_len) topk_indices = prob.topk(topk)[1].to('cpu') topk_indices.sort() doc = batch['doc'][doc_id].split('\n')[:doc_len] hyp = [doc[index] for index in topk_indices] ref = summaries[doc_id] with open(os.path.join(args.ref,str(file_id)+'.txt'), 'w',encoding='utf-8') as f: f.write(ref) with open(os.path.join(args.hyp,str(file_id)+'.txt'), 'w',encoding='utf-8') as f: f.write('.\n'.join(hyp)) start = stop file_id = file_id + 1 t2 = time() time_cost += t2 - t1 print('Speed: %.2f docs / s' % (doc_num / time_cost))
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data = tree.simplified_data(700, 100, 200) # print("data ",self.train_data)) self.vocab = utils.Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) # add input placeholders self.is_leaf_placeholder = tf.placeholder( tf.int32, (None), name='is_leaf_placeholder') self.node_word_indices_placeholder = tf.placeholder( tf.int32, (None), name='node_word_indices_placeholder') self.labels_placeholder = tf.placeholder( tf.int32, (None), name='labels_placeholder') self.cons_placeholder = tf.placeholder( tf.int32, (None), name='cons') # add model variables # making initialization deterministic for now initializer = tf.random_normal_initializer(seed=1) with tf.variable_scope('Embeddings'): self.embeddings = tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size]) # , # initializer=initializer) #tf.constant_initializer(2.0)) with tf.variable_scope('Composition'): W1 = tf.get_variable('W1', [2 * self.config.embed_size, self.config.embed_size]) # , # initializer=initializer) #tf.constant_initializer(0.0)) b1 = tf.get_variable('b1', [1, self.config.embed_size]) # , # initializer=initializer) #tf.constant_initializer(0.0)) with tf.variable_scope('Projection'): U = tf.get_variable('U', [self.config.embed_size, self.config.label_size]) # , # initializer=initializer) #tf.constant_initializer(0.0)) bs = tf.get_variable('bs', [1, self.config.label_size]) # , # initializer=initializer) #tf.constant_initializer(0.0)) # Build recursive graph # tensor_array = tf.TensorArray( # tf.float32, # size=0, # dynamic_size=True, # clear_after_read=False, # infer_shape=False) # Build recursive graph def embed_word(word_index, embeddings): # with tf.device('/cpu:0'): return tf.expand_dims(tf.gather(embeddings, word_index), 0) def combine_children(left_tensor, right_tensor, W, b): return tf.nn.relu(tf.matmul(tf.concat([left_tensor, right_tensor], 1), W) + b) # Function Declaration rec = function.Declare("Rec", [("i", tf.int32), ("is_leaf", tf.int32), ("node_word_indices", tf.int32), ("embeddings", tf.float32), ("W", tf.float32),("b", tf.float32)], [("ret", tf.float32)]) # Function Definition @function.Defun(tf.int32, tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, func_name="Rec", grad_func="GradFac", create_grad_func=True, out_names=["ret"]) def RecImpl(i, is_leaf, node_word_indices, embeddings, W, b): node_word_index = tf.gather(node_word_indices, i) node_tensor = \ tf.cond(tf.equal(tf.gather(is_leaf, i), tf.constant(1)), lambda: embed_word(node_word_index, embeddings), lambda: combine_children(rec(i*2, is_leaf, node_word_indices, embeddings, W, b), rec(i*2+1, is_leaf, node_word_indices, embeddings, W, b), W, b)) return node_tensor RecImpl.add_to_graph(tf.get_default_graph()) self.node_tensor = rec(self.cons_placeholder, self.is_leaf_placeholder, self.node_word_indices_placeholder, self.embeddings, W1, b1) # add projection layer # self.logits = tf.matmul(self.tensor_array.concat(), U) + bs # 1x35 * 35x35 + 1x35 -> 1x35 projection self.root_logits = tf.matmul(self.node_tensor, U) + bs self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1)) # add loss layer # regularization_loss = self.config.l2 * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)) # included_indices = tf.where(tf.less(self.labels_placeholder, 2)) # self.full_loss = regularization_loss + tf.reduce_sum( # tf.nn.sparse_softmax_cross_entropy_with_logits( # logits=tf.gather(self.logits, included_indices),labels=tf.gather(self.labels_placeholder, included_indices))) self.root_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.root_logits,labels=self.labels_placeholder[1:2])) # # add training op self.train_op = tf.train.GradientDescentOptimizer(self.config.lr).minimize(self.root_loss)
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data, self.real_test = tree.simplified_data(4000, 500, 500) # add input placeholders self.is_leaf_placeholder = tf.placeholder( tf.bool, (None), name='is_leaf_placeholder') self.left_children_placeholder = tf.placeholder( tf.int32, (None), name='left_children_placeholder') self.right_children_placeholder = tf.placeholder( tf.int32, (None), name='right_children_placeholder') self.node_word_indices_placeholder = tf.placeholder( tf.int32, (None), name='node_word_indices_placeholder') self.labels_placeholder = tf.placeholder( tf.int32, (None), name='labels_placeholder') self.vocab = utils.Vocab() data = self.train_data train_sents = [t.get_words() for t in data] vocab_size = self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) ''' def loadGloveModel(gloveFile): print("Loading Glove Model") f = open(gloveFile,'r') words = [] embeddings = [] for line in f: splitLine = line.split() word = splitLine[0] embedding = [float(val) for val in splitLine[1:]] words.append(word) embeddings.append(embedding) print "Done.",len(words)," words loaded!" return words, embeddings self.glove_words, self.embeddings = loadGloveModel("filtered_glove_300.txt") num = 0 embed = np.zeros((vocab_size, self.config.embed_size), dtype='f') for i in range(vocab_size): word = self.vocab.decode(i) if word in self.glove_words: indx = self.glove_words.index(word) num += 1 np.append(embed, self.embeddings[indx]) else: np.append(embed, np.random.uniform(-0.1, 0.1, self.config.embed_size)) print(num) ''' with tf.variable_scope('Embeddings'): embeddings = tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size]) #embeddings = tf.get_variable('embeddings', initializer=embed, trainable=True) a = np.zeros((self.config.embed_size, self.config.embed_size), dtype='f') np.fill_diagonal(a, 0.5) b = np.zeros((2*self.config.embed_size, self.config.embed_size), dtype='f') b = np.vstack((a,a)) print(b) with tf.variable_scope('Composition'): W1 = tf.Variable(b, name='W1', dtype=tf.float32) b1 = tf.get_variable('b1', [1, self.config.embed_size]) with tf.variable_scope('Projection'): U = tf.get_variable('U', [self.config.embed_size, self.config.label_size]) bs = tf.get_variable('bs', [1, self.config.label_size]) # build recursive graph tensor_array = tf.TensorArray( tf.float32, size=0, dynamic_size=True, clear_after_read=False, infer_shape=False) def embed_word(word_index): with tf.device('/cpu:0'): return tf.expand_dims(tf.gather(embeddings, word_index), 0) def combine_children(left_tensor, right_tensor): return tf.nn.relu(tf.matmul(tf.concat(1, [left_tensor, right_tensor]), W1) + b1) def loop_body(tensor_array, i): node_is_leaf = tf.gather(self.is_leaf_placeholder, i) node_word_index = tf.gather(self.node_word_indices_placeholder, i) left_child = tf.gather(self.left_children_placeholder, i) right_child = tf.gather(self.right_children_placeholder, i) node_tensor = tf.cond( node_is_leaf, lambda: embed_word(node_word_index), lambda: combine_children(tensor_array.read(left_child), tensor_array.read(right_child))) tensor_array = tensor_array.write(i, node_tensor) i = tf.add(i, 1) return tensor_array, i loop_cond = lambda tensor_array, i: \ tf.less(i, tf.squeeze(tf.shape(self.is_leaf_placeholder))) self.tensor_array, _ = tf.while_loop( loop_cond, loop_body, [tensor_array, 0], parallel_iterations=1) # add projection layer self.logits = tf.matmul(self.tensor_array.concat(), U) + bs self.root_logits = tf.matmul( self.tensor_array.read(self.tensor_array.size() - 1), U) + bs self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1)) # add loss layer regularization_loss = self.config.l2 * ( tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)) included_indices = tf.where(tf.less(self.labels_placeholder, 2)) #self.full_loss = regularization_loss + tf.reduce_sum( #tf.nn.sparse_softmax_cross_entropy_with_logits( #tf.gather(self.logits, included_indices), tf.gather( #self.labels_placeholder, included_indices))) self.root_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( self.root_logits, self.labels_placeholder[-1:])) # add training op self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize( self.root_loss)
def train(): print('Loading vocab, train and valid dataset...') embed = torch.Tensor(np.load(args.embedding)['embedding']) args.embed_num = embed.size(0) args.embed_dim = embed.size(1) with open(args.word2id) as f: word2id = json.load(f) vocab = utils.Vocab(embed, word2id) train_data = [] fns = os.listdir(args.train_dir) fns.sort() for fn in tqdm(fns): f = open(args.train_dir + fn, 'r') train_data.append(json.load(f)) f.close() val_data = [] fns = os.listdir(args.valid_dir) fns.sort() for fn in tqdm(fns): f = open(args.valid_dir + fn, 'r') val_data.append(json.load(f)) f.close() net = getattr(model, args.model)(args, embed) loss1 = getattr(model, 'hinge_loss_1')() loss2 = getattr(model, 'myLoss2')() # 训练SRL和句子打分阶段的loss if use_cuda: net.cuda() loss1.cuda() loss2.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=args.lr) net.train() # 训练SRL打分 print('Begin train SRL predictor...') for epoch in range(1, args.srl_epochs + 1): for i, blog in enumerate(train_data): sents, sent_targets, doc_lens, doc_targets, events, event_targets, event_tfs, event_prs, event_lens, event_sent_lens, _1, _2, = vocab.make_tensors( blog, args) if use_cuda: sents = sents.cuda() events = events.cuda() event_targets = event_targets.cuda() event_tfs = event_tfs.cuda() event_probs = net(sents, doc_lens, events, event_lens, event_sent_lens, event_tfs, True) loss = loss1(event_probs, event_targets) optimizer.zero_grad() if loss.data.item() > 1e-10: loss.backward() clip_grad_norm_(net.parameters(), args.max_norm) optimizer.step() print('SRL EPOCH [%d/%d]: BATCH_ID=[%d/%d] loss=%f' % (epoch, args.srl_epochs, i, len(train_data), loss)) cnt = (epoch - 1) * len(train_data) + i if cnt % args.valid_every == 0 and cnt / args.valid_every >= 0: print('Begin SRL valid...Epoch %d, Batch %d' % (epoch, i)) p_5, p_10, p_20, mse = evaluate_srl(net, loss1, vocab, val_data) save_path = args.save_dir + args.model + '_SRL_%d_%.4f_%.4f_%.4f_%.4f' % ( cnt / args.valid_every, p_5, p_10, p_20, mse) net.save(save_path) print('Epoch: %2d Loss: %f' % (epoch, loss)) adjust_learning_rate(optimizer, epoch) """
def main(): utils.print_config(args) if 'train' not in args.mode: args.keep_rate = 1.0 args.use_pretrain = True if args.use_pretrain == 'True' else False args.use_aux_task = True if args.use_aux_task == 'True' else False if args.mode == 'lm_train': args.model = 'lm' args.data_path = "./data/wikitext/wikitext-103/processed_wiki_train.bin" args.use_pretrain = False args.model_path = os.path.join(args.model_path, args.exp_name).format( args.model) #model_path default="data/log/{} if not os.path.exists(args.model_path): if 'train' not in args.mode: print(args.model_path) raise ValueError os.makedirs(args.model_path) with open(os.path.join(args.model_path, 'config.json'), 'w', encoding='utf8') as f: json.dump(vars(args), f) print("Default models path: {}".format(args.model_path)) print('code start/ {} mode / {} models'.format(args.mode, args.model)) utils.assign_specific_gpu(args.gpu_nums) vocab = utils.Vocab() vardicts = utils.get_pretrain_weights( args.true_pretrain_ckpt_path ) if args.use_pretrain and args.mode == 'train' else None if args.mode == 'decode': if args.model == 'mmi_bidi': args.beam_size = args.mmi_bsize args.batch_size = args.beam_size modelhps = deepcopy(args) if modelhps.mode == 'decode': modelhps.max_dec_len = 1 if args.model == 'vanilla': model = BaseModel(vocab, modelhps) elif args.model == 'mmi_bidi': if args.mode == 'decode': bw_graph = tf.Graph() with bw_graph.as_default(): bw_model = BaseModel(vocab, args) bw_sess = tf.Session(graph=bw_graph, config=utils.gpu_config()) with bw_sess.as_default(): with bw_graph.as_default(): bidi_ckpt_path = utils.load_ckpt(bw_model.hps, bw_model.saver, bw_sess) fw_graph = tf.Graph() with fw_graph.as_default(): modelhps.model_path = modelhps.model_path.replace( 'mmi_bidi', 'vanilla') modelhps.model = 'vanilla' fw_model = BaseModel(vocab, modelhps) fw_sess = tf.Session(graph=fw_graph) with fw_sess.as_default(): with fw_graph.as_default(): ckpt_path = utils.load_ckpt(fw_model.hps, fw_model.saver, fw_sess) else: model = BaseModel(vocab, modelhps) elif args.model == 'lm': model = LMModel(vocab, modelhps) elif args.model == 'embmin': model = DiverEmbMin(vocab, modelhps) else: raise ValueError print('models load end') if args.mode in ['train', 'lm_train']: train(model, vocab, vardicts) elif args.mode == 'decode': import time if args.model == 'mmi_bidi': batcher = Batcher( vocab, bw_model.hps.data_path.replace('train_', 'test_'), args) decoder = BeamsearchDecoder(fw_model, batcher, vocab, fw_sess=fw_sess, bw_model=bw_model, bw_sess=bw_sess, bidi_ckpt_path=bidi_ckpt_path) else: batcher = Batcher(vocab, model.hps.data_path.replace('train_', 'test_'), args) decoder = BeamsearchDecoder(model, batcher, vocab) decoder.decode() elif args.mode == 'eval': pass
def __init__(self, vocab_file_path=None, model_file_path=None): """ :param vocab_file_path: tuple of code vocab, ast vocab, nl vocab, if given, build vocab by given path :param model_file_path: """ # dataset self.train_dataset = data.CodePtrDataset( code_path=config.train_code_path, ast_path=config.train_sbt_path, nl_path=config.train_nl_path) self.train_dataset_size = len(self.train_dataset) self.train_dataloader = DataLoader( dataset=self.train_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=lambda *args: utils.unsort_collate_fn( args, code_vocab=self.code_vocab, ast_vocab=self.ast_vocab, nl_vocab=self.nl_vocab)) # vocab self.code_vocab: utils.Vocab self.ast_vocab: utils.Vocab self.nl_vocab: utils.Vocab # load vocab from given path if vocab_file_path: code_vocab_path, ast_vocab_path, nl_vocab_path = vocab_file_path self.code_vocab = utils.load_vocab_pk(code_vocab_path) self.ast_vocab = utils.load_vocab_pk(ast_vocab_path) self.nl_vocab = utils.load_vocab_pk(nl_vocab_path) # new vocab else: self.code_vocab = utils.Vocab('code_vocab') self.ast_vocab = utils.Vocab('ast_vocab') self.nl_vocab = utils.Vocab('nl_vocab') codes, asts, nls = self.train_dataset.get_dataset() for code, ast, nl in zip(codes, asts, nls): self.code_vocab.add_sentence(code) self.ast_vocab.add_sentence(ast) self.nl_vocab.add_sentence(nl) self.origin_code_vocab_size = len(self.code_vocab) self.origin_nl_vocab_size = len(self.nl_vocab) # trim vocabulary self.code_vocab.trim(config.code_vocab_size) self.nl_vocab.trim(config.nl_vocab_size) # save vocabulary self.code_vocab.save(config.code_vocab_path) self.ast_vocab.save(config.ast_vocab_path) self.nl_vocab.save(config.nl_vocab_path) self.code_vocab.save_txt(config.code_vocab_txt_path) self.ast_vocab.save_txt(config.ast_vocab_txt_path) self.nl_vocab.save_txt(config.nl_vocab_txt_path) self.code_vocab_size = len(self.code_vocab) self.ast_vocab_size = len(self.ast_vocab) self.nl_vocab_size = len(self.nl_vocab) # model self.model = models.Model(code_vocab_size=self.code_vocab_size, ast_vocab_size=self.ast_vocab_size, nl_vocab_size=self.nl_vocab_size, model_file_path=model_file_path) self.params = list(self.model.code_encoder.parameters()) + \ list(self.model.ast_encoder.parameters()) + \ list(self.model.reduce_hidden.parameters()) + \ list(self.model.decoder.parameters()) # optimizer self.optimizer = Adam([ { 'params': self.model.code_encoder.parameters(), 'lr': config.code_encoder_lr }, { 'params': self.model.ast_encoder.parameters(), 'lr': config.ast_encoder_lr }, { 'params': self.model.reduce_hidden.parameters(), 'lr': config.reduce_hidden_lr }, { 'params': self.model.decoder.parameters(), 'lr': config.decoder_lr }, ], betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) if config.use_lr_decay: self.lr_scheduler = lr_scheduler.StepLR( self.optimizer, step_size=config.lr_decay_every, gamma=config.lr_decay_rate) # best score and model(state dict) self.min_loss: float = 1000 self.best_model: dict = {} self.best_epoch_batch: (int, int) = (None, None) # eval instance self.eval_instance = eval.Eval(self.get_cur_state_dict()) # early stopping self.early_stopping = None if config.use_early_stopping: self.early_stopping = utils.EarlyStopping() config.model_dir = os.path.join(config.model_dir, utils.get_timestamp()) if not os.path.exists(config.model_dir): os.makedirs(config.model_dir)
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data = tree.simplified_data(700, 100, 200) max_height = tree.get_max_tree_height(self.train_data + self.dev_data + self.test_data) self.config.max_tree_height = pow(2, max_height + 1) print(self.config.max_tree_height) # print("data ",self.train_data)) self.vocab = utils.Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) # add input placeholders dim1 = self.config.batch_size dim2 = self.config.max_tree_height self.is_leaf_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='is_leaf_placeholder') self.node_word_indices_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='node_word_indices_placeholder') self.labels_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='labels_placeholder') self.cons_placeholder = tf.placeholder( tf.int32, (None), name='cons') # add model variables with tf.variable_scope('Embeddings'): self.embeddings = tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size]) with tf.variable_scope('Composition'): self.W1 = tf.get_variable('W1', [2 * self.config.embed_size, self.config.embed_size]) self.b1 = tf.get_variable('b1', [1, self.config.embed_size]) with tf.variable_scope('Projection'): self.U = tf.get_variable('U', [self.config.embed_size, self.config.label_size]) self.bs = tf.get_variable('bs', [1, self.config.label_size]) # Build recursive graph def embed_word(word_index, embeddings): return tf.expand_dims(tf.gather(embeddings, word_index), 0) def combine_children(left_tensor, right_tensor, W, b): return tf.nn.relu(tf.matmul(tf.concat([left_tensor, right_tensor], 1), W) + b) def find_loss(node_tensor, i, labels, U, bs): # add projection layer node_logits = tf.matmul(node_tensor, U) + bs loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=node_logits, labels=labels[i:i+1]) return loss def base_case(node_word_indices, i, embeddings, labels, U, bs): word_index = tf.gather(node_word_indices, i) node_tensor = embed_word(word_index, embeddings) loss = find_loss(node_tensor, i, labels, U, bs) return [node_tensor, loss] def rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs): left_node, left_loss = self.rec(i*2, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs) right_node, right_loss = self.rec(i*2+1, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs) node_tensor = combine_children(left_node, right_node, W, b) node_loss = find_loss(node_tensor, i, labels, U, bs) loss = tf.concat([left_loss, node_loss, right_loss], 0) return [node_tensor, loss] # Function Declaration self.rec = function.Declare("Rec", [("i", tf.int32), ("is_leaf", tf.int32), ("node_word_indices", tf.int32), ("embeddings", tf.float32), ("W", tf.float32), ("b", tf.float32), ("labels", tf.int32), ("U", tf.float32), ("bs", tf.float32)], [("ret", tf.float32), ("ret1", tf.float32)]) # Function Definition @function.Defun(tf.int32, tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, func_name="Rec", grad_func="GradFac", create_grad_func=True, out_names=["ret", "ret1"]) def RecImpl(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs): node_tensor, loss = \ tf.cond(tf.equal(tf.gather(is_leaf, i), tf.constant(1)), lambda: base_case(node_word_indices, i, embeddings, labels, U, bs), lambda: rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs)) return [node_tensor, loss] RecImpl.add_to_graph(tf.get_default_graph()) outloss = [] prediction = [] root_loss = [] for idx_batch in range(self.config.batch_size): self.root_prediction, self.full_loss, self.root_loss = self.compute_tree(idx_batch) prediction.append(self.root_prediction) outloss.append(self.full_loss) root_loss.append(self.root_loss) batch_loss = tf.stack(outloss) self.pred = tf.stack(prediction) self.rloss = tf.stack(root_loss) # Compute batch loss # reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) # regpart = tf.add_n(reg_losses) # loss = tf.reduce_mean(batch_loss) # self.total_loss = loss + 0.5*regpart self.total_loss = tf.reduce_mean(batch_loss) # Add training op self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize(self.total_loss)
def train(): print('Loading vocab, train and valid dataset...') embed = torch.Tensor(np.load(args.embedding)['embedding']) args.embed_num = embed.size(0) args.embed_dim = embed.size(1) with open(args.word2id) as f: word2id = json.load(f) vocab = utils.Vocab(embed, word2id) train_data = [] fns = os.listdir(args.train_dir) fns.sort() for fn in tqdm(fns): f = open(args.train_dir + fn, 'r') train_data.append(json.load(f)) f.close() val_data = [] fns = os.listdir(args.valid_dir) fns.sort() for fn in tqdm(fns): f = open(args.valid_dir + fn, 'r') val_data.append(json.load(f)) f.close() net = getattr(model, args.model)(args, embed) myloss = nn.MSELoss() if use_cuda: net.cuda() myloss.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=args.lr) net.train() # 训练SRL打分 print('Begin train SRL predictor...') for epoch in range(1, args.srl_epochs + 1): for i, blog in enumerate(train_data): sents, sent_targets, doc_lens, doc_targets, events, event_targets, event_tfs, event_prs, event_lens, event_sent_lens, _1, _2, = vocab.make_tensors( blog, args) if use_cuda: sents = sents.cuda() events = events.cuda() event_targets = event_targets.cuda() event_tfs = event_tfs.cuda() event_probs = net(sents, doc_lens, events, event_lens, event_tfs, True) loss = myloss(event_probs, event_targets) optimizer.zero_grad() loss.backward() clip_grad_norm_(net.parameters(), args.max_norm) optimizer.step() print('SRL EPOCH [%d/%d]: BATCH_ID=[%d/%d] loss=%f' % (epoch, args.srl_epochs, i, len(train_data), loss)) adjust_learning_rate(optimizer, epoch) train_srl_score, valid_srl_score, loss1, loss2 = srl_predict( net, myloss, vocab, train_data, val_data) print('SRL predict loss: train: %f valid: %f' % (loss1, loss2)) # 训练句子打分 print('Begin train Sent predictor...') adjust_learning_rate(optimizer, 0) for epoch in range(1, args.sent_epochs + 1): for i, blog in enumerate(train_data): sents, sent_targets, doc_lens, doc_targets, events, event_targets, event_tfs, event_prs, event_lens, event_sent_lens, _1, _2, = vocab.make_tensors( blog, args) event_scores = train_srl_score[i] if use_cuda: sents = sents.cuda() sent_targets = sent_targets.cuda() events = events.cuda() event_scores = event_scores.cuda() sent_probs = net(sents, doc_lens, events, event_lens, event_scores, False) loss = myloss(sent_probs, sent_targets) optimizer.zero_grad() loss.backward() clip_grad_norm_(net.parameters(), args.max_norm) optimizer.step() print('SENT EPOCH [%d/%d]: BATCH_ID=[%d/%d] loss=%f' % (epoch, args.sent_epochs, i, len(train_data), loss)) cnt = (epoch - 1) * len(train_data) + i if cnt % args.valid_every == 0 and cnt / args.valid_every > 0: print('Begin valid... Epoch %d, Batch %d' % (epoch, i)) cur_loss, r1, r2, rl, rsu = evaluate(net, myloss, vocab, val_data, valid_srl_score, True) save_path = args.save_dir + args.model + '_SENT_%d_%.4f_%.4f_%.4f_%.4f_%.4f' % ( cnt / args.valid_every, cur_loss, r1, r2, rl, rsu) net.save(save_path) print( 'Epoch: %2d Loss: %f Rouge-1: %f Rouge-2: %f Rouge-l: %f Rouge-SU4: %f' % (epoch, cur_loss, r1, r2, rl, rsu)) adjust_learning_rate(optimizer, epoch)
def train(): logging.info('Loading vocab, train and val dataset...') embed = torch.Tensor(np.load(args.embedding)['embedding']) with open(args.word2id) as f: word2id = json.load(f) vocab = utils.Vocab(embed, word2id) with open(args.train_dir) as f: examples = [json.loads(line) for line in f] train_dataset = utils.Dataset(examples) with open(args.val_dir) as f: examples = [json.loads(line) for line in f] val_dataset = utils.Dataset(examples) # update args args.embed_num = embed.size(0) args.embed_dim = embed.size(1) args.kernel_sizes = [int(ks) for ks in args.kernel_sizes.split(',')] # for CNN_RNN # build model net = getattr(models, args.model)(args, embed) if use_gpu: net.cuda() # load dataset train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) valid_loader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False) # loss function criterion = nn.BCELoss() # model info print(net) params = sum(p.numel() for p in list(net.parameters())) / 1e6 print('#Params: %.1fM' % (params)) min_loss = float('inf') optimizer = torch.optim.Adam(net.parameters(), lr=args.lr) net.train() # Tensorbard writer = SummaryWriter(f'runs/{args.model}') t1 = time() for epoch in tqdm(range(1, args.epochs + 1), desc='Epoch', position=0): for i, batch in enumerate(tqdm(train_loader, desc='Train', position=1)): features, targets, _, doc_lens = vocab.make_features(batch) features, targets = Variable(features), Variable(targets.float()) if use_gpu: features = features.cuda() targets = targets.cuda() probs = net(features, doc_lens) loss = criterion(probs, targets) optimizer.zero_grad() loss.backward() clip_grad_norm_(net.parameters(), args.max_norm) optimizer.step() # TensorBoard train_acc = accuracy(probs, targets) writer.add_scalar('train_loss_batch', loss, epoch * len(train_loader) + i) writer.add_scalar('train_acc_batch', train_acc, epoch * len(train_loader) + i) if args.debug: print(f'Batch ID: {i}, Loss: {loss.item()}, Acc: {train_acc}') continue if i % args.report_every == 0: cur_loss, cur_acc = eval(net, vocab, valid_loader, criterion) if cur_loss < min_loss: min_loss = cur_loss best_path = net.save() logging.info( f'Epoch: {epoch}, Min_Val_Loss: {min_loss}, Cur_Val_Loss: {cur_loss}, Cur_Val_Acc: {cur_acc}' ) # TensorBoard writer.add_scalar('valid_loss', cur_loss, epoch) writer.add_scalar('valid_acc', cur_acc, epoch) t2 = time() logging.info('Total Time:%f h' % ((t2 - t1) / 3600))