def main(): if args.train: for t in range(model.checkpoint, args.num_epochs): if t + 1 <= args.num_epochs_all_nodes: train(t + 1, get_batches(data_train_all_nodes, args.batch_size), 'train') else: train(t + 1, get_batches(data_train, args.batch_size), 'train') train(t + 1, dev_batches, 'dev') train(t + 1, test_batches, 'test') elif args.oracle: oracle(args, model, ptb, data_test, 'test') else: if args.robust: for i in range(args.num_epochs): eps_scheduler.step_epoch(verbose=False) res = [] for i in range(1, args.budget + 1): logger.info('budget {}'.format(i)) ptb.budget = i acc_rob = train(None, test_batches, 'test') res.append(acc_rob) logger.info('Verification results:') for i in range(len(res)): logger.info('budget {} acc_rob {:.3f}'.format(i + 1, res[i])) logger.info(res) else: train(None, test_batches, 'test')
def train(epoch): model.train() # Load data for a epoch. train_batches = get_batches(data_train, args.batch_size) for a in avg: a.reset() eps_inc_per_step = 1.0 / (args.num_epochs_warmup * len(train_batches)) for i, batch in enumerate(train_batches): # We increase eps linearly every batch. eps = args.eps * min( eps_inc_per_step * ((epoch - 1) * len(train_batches) + i + 1), 1.0) # Call the main training loop. acc, acc_robust, loss = res = step(model, ptb, batch, eps=eps, train=True) # Optimize the loss. torch.nn.utils.clip_grad_norm_(model.core.parameters(), 5.0) optimizer.step() optimizer.zero_grad() # Print training statistics. for k in range(3): avg[k].update(res[k], len(batch)) if (i + 1) % args.log_interval == 0: logger.info( "Epoch {}, training step {}/{}: acc {:.3f}, robust acc {:.3f}, loss {:.3f}, eps {:.3f}" .format(epoch, i + 1, len(train_batches), avg_acc.avg, avg_acc_robust.avg, avg_loss.avg, eps)) model.save(epoch)
def main(_): data_path = 'data/new-dataset-cornell-length10-filter1-vocabSize40000.pkl' word2id, id2word, trainingSamples = load_dataset(data_path) hparam = Config() with tf.Session() as sess: model = Seq2SeqModel(hparam, word2id) ckpt = tf.train.get_checkpoint_state(hparam.save_path) if FLAGS.resume and ckpt and tf.train.checkpoint_exists( ckpt.model_checkpoint_path): print("Restoring model parameters from %s." % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("Creating model with fresh parameters.") sess.run(model.init) train_writer = tf.summary.FileWriter(hparam.save_path, graph=sess.graph) for epoch in range(hparam.num_epoch): print("Starting Epoch {}/{}:".format(epoch, hparam.num_epoch)) batches = get_batches(trainingSamples, hparam.batch_size) total_loss = 0.0 total_count = 0 for nextBatch in tqdm(batches, desc="training"): outputs = model.train_session(sess, nextBatch) loss = outputs["loss"] summary = outputs["summary"] step = outputs["step"] train_writer.add_summary(summary, step) total_loss += loss total_count += 1 if step % hparam.display_per_step == 0: perplexity = math.exp( float(total_loss / total_count) ) if total_loss / total_count < 300 else float('inf') tqdm.write( " Step %d | Per-word Loss %.4f | Perplexity %.4f" % (step, total_loss / total_count, perplexity)) checkpoint_path = os.path.join(hparam.save_path, hparam.model_name) model.saver.save(sess, checkpoint_path) tqdm.write("\n") tqdm.write(" Epoch %d | Per-word Loss %.4f | Perplexity %.4f" % (epoch, total_loss / total_count, perplexity)) tqdm.write("\n")
def fit(self, train_data, val_data): train_data_len = len(train_data) saver = tf.train.Saver(tf.global_variables()) self.sess = tf.Session() self.sess.run(self.init_op) self.add_summary(self.sess) (X_val_batch, Y_val_batch, X_val_batch_lens, Y_val_batch_lens) = \ next(get_batches(val_data, self.batch_size, self.vocab, self.tag2label)) for epoch in range(1, self.epoch_num + 1): for local_step, (X_train_batch, Y_train_batch, X_train_batch_lens, Y_train_batch_lens) in enumerate( get_batches(train_data, self.batch_size, self.vocab, self.tag2label)): _, loss, summary, step_num = self.sess.run([self.train_op, self.loss, self.merged, self.global_step], {self.word_ids: X_train_batch, self.labels: Y_train_batch, self.sequence_lengths: X_train_batch_lens, self.dropout_pl: self.dropout_keep_prod}) if local_step % self.display_step == 0: val_loss = self.sess.run(self.loss, {self.word_ids: X_val_batch, self.labels: Y_val_batch, self.sequence_lengths: X_val_batch_lens, self.dropout_pl: self.dropout_keep_prod}) print("Epoch %d/%d | Batch %d/%d | train_loss: %.3f | val_loss: %.3f" % (epoch, self.epoch_num, local_step, len(train_data)//self.batch_size, loss, val_loss)) self.file_writer.add_summary(summary, step_num) if self.n_step_to_save and step_num % self.n_step_to_save == 0 and step_num != 0: # every n step saver.save(self.sess, self.model_path, global_step=step_num) print("Model Saved... at time step " + str(step_num)) saver.save(self.sess, self.model_path) print("Model Saved.") self.sess.close()
def oracle(args, model, ptb, data, type): logger.info('Running oracle for {}'.format(type)) model.eval() assert (isinstance(ptb, PerturbationSynonym)) cnt_cor = 0 word_embeddings = model.word_embeddings.weight vocab = model.vocab for t, example in enumerate(data): embeddings, mask, tokens, label_ids = model.get_input([example]) candidates = example['candidates'] if tokens[0][0] == '[CLS]': candidates = [[]] + candidates + [[]] embeddings_all = [] def dfs(tokens, embeddings, budget, index): if index == len(tokens): embeddings_all.append(embeddings.cpu()) return dfs(tokens, embeddings, budget, index + 1) if budget > 0 and tokens[index] != '[UNK]' and len(candidates[index]) > 0\ and tokens[index] == candidates[index][0]: for w in candidates[index][1:]: if w in vocab: _embeddings = torch.cat([ embeddings[:index], word_embeddings[vocab[w]].unsqueeze(0), embeddings[index + 1:] ], dim=0) dfs(tokens, _embeddings, budget - 1, index + 1) dfs(tokens[0], embeddings[0], ptb.budget, 0) cor = True for embeddings in get_batches(embeddings_all, args.oracle_batch_size): embeddings_tensor = torch.cat(embeddings).cuda().reshape( len(embeddings), *embeddings[0].shape) logits = model.model_from_embeddings(embeddings_tensor, mask) for pred in list(torch.argmax(logits, dim=1)): if pred != example['label']: cor = False if not cor: break cnt_cor += cor if (t + 1) % args.log_interval == 0: logger.info('{} {}/{}: oracle robust acc {:.3f}'.format( type, t + 1, len(data), cnt_cor * 1. / (t + 1))) logger.info('{}: oracle robust acc {:.3f}'.format(type, cnt_cor * 1. / (t + 1)))
infer_model = InferenceModel(vocab_size, embedding_size, num_units, num_layers, max_target_sequence_length, infer_batch_size, beam_size, segment_to_int, infer_mode) checkpoints_path = "model2/checkpoints" #train_sess.run(initializer) infer_batch = get_infer_batches(source_inputs, infer_batch_size, vocab_to_int['<PAD>']) print(infer_batch) for i in range(epochs): for batch_i, batch in enumerate( get_batches(source_inputs, target_inputs, target_outputs, batch_size, vocab_to_int['<PAD>'], vocab_to_int['<PAD>'])): if batch_i <= 30000: current_loss = train_model.train(train_sess, batch) print('Epoch %d Batch %d/%d - Training Loss: %f' % (i + 1, batch_i + 1, (len(source_inputs) - 1) // batch_size + 1, current_loss)) if (batch_i + 1) % infer_step == 0: print("in") checkpoint_path = train_model.saver.save(train_sess, checkpoints_path, global_step=(i * 100 + batch_i)) print("out") infer_model.saver.restore(infer_sess, checkpoint_path) current_predict = infer_model.infer(infer_sess, infer_batch)
def main(args): # parse args args = parse_args(args) # prepare data if args['prep_data']: print('\n>> Preparing Data\n') prepare_data(args) sys.exit() # ELSE # read data and metadata from pickled files with open(P_DATA_DIR + 'metadata.pkl', 'rb') as f: metadata = pkl.load(f) with open(P_DATA_DIR + 'data.pkl', 'rb') as f: data_ = pkl.load(f) # read content of data and metadata candidates = data_['candidates'] candid2idx, idx2candid = metadata['candid2idx'], metadata['idx2candid'] # get train/test/val data train, test, val = data_['train'], data_['test'], data_['val'] # gather more information from metadata sentence_size = metadata['sentence_size'] w2idx = metadata['w2idx'] # is a list idx2w = metadata['idx2w'] memory_size = metadata['memory_size'] vocab_size = metadata['vocab_size'] n_cand = metadata['n_cand'] candidate_sentence_size = metadata['candidate_sentence_size'] # embeddings = metadata['embeddings'] # vectorize candidates candidates_vec = data_utils.vectorize_candidates(candidates, w2idx, candidate_sentence_size) print('---- memory config ----') print('embedding size:', EMBEDDING_SIZE) print('batch_size:', BATCH_SIZE) print('memory_size:', memory_size) print('vocab_size:', vocab_size) print('candidate_size:', n_cand) print('candidate_sentence_size:', candidate_sentence_size) print('hops:', HOPS) print('---- end ----') ### # create model # model = model['memn2n']( # why? model = memn2n.MemN2NDialog(batch_size=BATCH_SIZE, vocab_size=vocab_size, candidates_size=n_cand, sentence_size=sentence_size, embedding_size=EMBEDDING_SIZE, candidates_vec=candidates_vec, hops=HOPS) # model = memn2n2.MemN2NDialog( # batch_size=BATCH_SIZE, # vocab_size=vocab_size, # candidates_size=n_cand, # sentence_size=sentence_size, # embedding_size=EMBEDDING_SIZE, # candidates_vec=candidates_vec, # embeddings=embeddings, # hops=HOPS # ) # gather data in batches train, val, test, batches = data_utils.get_batches(train, val, test, metadata, batch_size=BATCH_SIZE) # for t in train['q']: # print(recover_sentence(t, idx2w)) if args['train']: # training starts here epochs = args['epochs'] eval_interval = args['eval_interval'] # restore from checkpoint _check_restore_parameters(model.get_sess(), model.saver, CKPT_DIR) # # training and evaluation loop print('\n>> Training started!\n') # write log to file log_handle = open(dir_path + '/../../logs/' + args['log_file'], 'w') cost_total = 0. best_cost = 100 # best_validation_accuracy = 0. lowest_val_acc = 0.8 total_begin = time.clock() begin = time.clock() for i in range(epochs + 1): for start, end in batches: s = train['s'][start:end] q = train['q'][start:end] # print(len(q)) a = train['a'][start:end] if config.MULTILABEL >= 1: # convert to one hot one_hot = np.zeros((end - start, n_cand)) for aa in range(end - start): for index in a[aa]: one_hot[aa][index] = 1 a = one_hot cost_total += model.batch_fit(s, q, a) if config.MULTILABEL >= 1: if i % 1 == 0 and i: print('stage...', i, cost_total) if cost_total < best_cost: print('saving model...', i, '++', str(best_cost) + '-->' + str(cost_total)) best_cost = cost_total model.saver.save(model.get_sess(), CKPT_DIR + '/memn2n_model.ckpt', global_step=i) else: if i % 1 == 0 and i: print('stage...', i) if i % eval_interval == 0 and i: train_preds = batch_predict(model, train['s'], train['q'], len(train['s']), batch_size=BATCH_SIZE) for error in range(len(train['q'])): if train_preds[error] != train['a'][error]: print_out = recover(error, train['s'], train['q'], train_preds[error], train['a'][error], idx2w, idx2candid) print(print_out) # print(recover_sentence(train['q'][i], idx2w), # recover_cls(train_preds[i], idx2candid), # recover_cls(train['a'][i], idx2candid)) val_preds = batch_predict(model, val['s'], val['q'], len(val['s']), batch_size=BATCH_SIZE) train_acc = metrics.accuracy_score( np.array(train_preds), train['a']) val_acc = metrics.accuracy_score(val_preds, val['a']) end = time.clock() print('Epoch[{}] : <ACCURACY>\n\t,\ training : {} \n\t,\ validation : {}\n\t,\ current_best_accuracy: {}'.format( i, train_acc, val_acc, lowest_val_acc)) print('time:{}'.format(end - begin)) # log_handle.write('{} {} {} {}\n'.format(i, train_acc, val_acc, # cost_total / (eval_interval * len(batches)))) cost_total = 0. # empty cost begin = end # # save the best model, to disk # if val_acc > best_validation_accuracy: # best_validation_accuracy = val_acc if train_acc > lowest_val_acc: print('saving model...', train_acc, lowest_val_acc) lowest_val_acc = train_acc model.saver.save(model.get_sess(), CKPT_DIR + '/memn2n_model.ckpt', global_step=i) # close file total_end = time.clock() print('Total time: {} minutes.'.format((total_end - total_begin) / 60)) log_handle.close() else: # inference ### # restore checkpoint # ckpt = tf.train.get_checkpoint_state(CKPT_DIR) # if ckpt and ckpt.model_checkpoint_path: # print('\n>> restoring checkpoint from', ckpt.model_checkpoint_path) # model.saver.restore(model.get_sess(), ckpt.model_checkpoint_path) # # base(model, idx2candid, w2idx, sentence_size, BATCH_SIZE, n_cand, memory_size) # # # create an base session instance # isess = InteractiveSession( # model, idx2candid, w2idx, n_cand, memory_size) # # if args['infer']: # query = '' # while query != 'exit': # query = input('>> ') # print('>> ' + isess.reply(query)) # elif args['ui']: # return isess pass
def main(_): with tf.Session() as sess: cells = get_lstm_cells(num_hidden, keep_prob) init_states = cells.zero_state(batch_size, tf.float32) outputs, final_states = rnn(rnn_inputs, cells, num_hidden[-1], num_steps, num_class, init_states) predicts = tf.argmax(outputs, -1, name='predict_op') softmax_out = tf.nn.softmax(outputs, name='softmax_op') top_k = tf.nn.top_k(softmax_out, k=k, sorted=False, name='top_k_op') with tf.variable_scope('train'): loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( labels=labels, logits=outputs), name='loss_op') global_step = tf.Variable(0, name='global_step', trainable=False, collections=[ tf.GraphKeys.GLOBAL_VARIABLES, tf.GraphKeys.GLOBAL_STEP ]) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) train_op = optimizer.minimize(loss, global_step=global_step, name='train_op') arg_labels = tf.argmax(labels, -1) acc = tf.reduce_mean(tf.cast(tf.equal(predicts, arg_labels), tf.float32), name='acc_op') sess.run(tf.global_variables_initializer()) global_step_tensor = sess.graph.get_tensor_by_name( 'train/global_step:0') train_op = sess.graph.get_operation_by_name('train/train_op') acc_op = sess.graph.get_tensor_by_name('train/acc_op:0') loss_tensor = sess.graph.get_tensor_by_name('train/loss_op:0') print('Start training ...') loss_history = [] acc_history = [] batch_num = 30 a = datetime.now().replace(microsecond=0) for i in range(epochs): total_loss = 0 total_acc = 0 count = 0 current_states = sess.run(init_states, feed_dict={batch_size: batch_num}) for x, y in get_batches(train_encode, batch_num, num_steps): _, loss_value, acc_value, current_states = sess.run( [train_op, loss_tensor, acc_op, final_states], feed_dict={ X: x, Y: y, init_states: current_states, keep_prob: 1 }) total_loss += loss_value total_acc += acc_value count += 1 total_loss /= count total_acc /= count valid_acc = 0 count = 0 current_states = sess.run(init_states, feed_dict={batch_size: batch_num}) for x, y in get_batches(valid_encode, batch_num, num_steps): acc_value, current_states = sess.run([acc_op, final_states], feed_dict={ X: x, Y: y, init_states: current_states }) valid_acc += acc_value count += 1 valid_acc /= count print("Epochs: {}, loss: {:.4f}, acc: {:.4f}, val_acc: {:.4f}". format(i + 1, total_loss, total_acc, valid_acc)) loss_history.append(total_loss) acc_history.append([total_acc, valid_acc]) plt.plot(loss_history) plt.xlabel("epochs") plt.ylabel("BPC") plt.title("Training curve") plt.savefig("Training curve.png", dpi=100) plt.gcf().clear() acc_history = np.array(acc_history).T err_history = 1 - acc_history plt.plot(err_history[0], label='training error') plt.plot(err_history[1], label='validation error') plt.xlabel("epochs") plt.ylabel("Error rate") plt.title("Training error") plt.legend() plt.savefig("Training error.png", dpi=100) # predict 500 words seed = 'Asuka' seed_encode = np.array([vocab_to_int[c] for c in list(seed)]) seed_encode = np.concatenate((seed_encode, np.zeros(num_steps - 5))) current_states = sess.run(init_states, feed_dict={batch_size: 1}) index = 4 for i in range(500): if index == num_steps - 1: candidates, current_states = sess.run([top_k, final_states], feed_dict={ X: seed_encode[None, :], init_states: current_states }) p = candidates.values[0, index] p /= p.sum() rand_idx = np.random.choice(k, p=p) seed_encode = np.append(candidates.indices[0, index, rand_idx], np.zeros(num_steps - 1)) else: candidates = sess.run(top_k, feed_dict={ X: seed_encode[None, :], init_states: current_states }) p = candidates.values[0, index] p /= p.sum() rand_idx = np.random.choice(k, p=p) seed_encode[index + 1] = candidates.indices[0, index, rand_idx] seed += int_to_vocab[candidates.indices[0, index, rand_idx]] index = (index + 1) % num_steps print(seed) b = datetime.now().replace(microsecond=0) print("Time cost:", b - a)
torch.cuda.manual_seed_all(args.seed) dummy_embeddings = torch.zeros(1, args.max_sent_length, args.embedding_size, device=args.device) dummy_labels = torch.zeros(1, dtype=torch.long, device=args.device) if args.model == 'transformer': dummy_mask = torch.zeros(1, 1, 1, args.max_sent_length, device=args.device) model = Transformer(args, data_train) elif args.model == 'lstm': dummy_mask = torch.zeros(1, args.max_sent_length, device=args.device) model = LSTM(args, data_train) dev_batches = get_batches(data_dev, args.batch_size) test_batches = get_batches(data_test, args.batch_size) ptb = PerturbationSynonym(budget=args.budget) dummy_embeddings = BoundedTensor(dummy_embeddings, ptb) model_ori = model.model_from_embeddings bound_opts = {'relu': args.bound_opts_relu, 'exp': 'no-max-input'} if isinstance(model_ori, BoundedModule): model_bound = model_ori else: model_bound = BoundedModule(model_ori, (dummy_embeddings, dummy_mask), bound_opts=bound_opts, device=args.device) model.model_from_embeddings = model_bound if args.loss_fusion: bound_opts['loss_fusion'] = True
def train_model(model): train_num_batches = int(len(model.X_train) / model.config.batch_size) train_loss_history = np.zeros( (model.config.max_epochs, train_num_batches)) ## Store each batch separately. model.config.val_batchsize = model.config.batch_size ## Can be anything, typically greater than train batch size. val_num_batches = int(len(model.X_val) / model.config.val_batchsize) val_loss_history = np.zeros((model.config.max_epochs, val_num_batches)) train_acc_history = np.zeros( (model.config.max_epochs, model.config.label_size)) ## Store each class separately val_acc_history = np.zeros_like(train_acc_history) best_val_acc = 0 best_epoch = 0 if not os.path.exists("./weights"): os.makedirs("./weights") with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for epoch in range(model.config.max_epochs): print('Epoch: ', epoch) X_train, seq_len_train, y_train = get_batches( model.X_train, model.y_train, model.config.batch_size) epoch_train_loss, epoch_train_acc = run_epoch( sess, model, zip(X_train, seq_len_train, y_train)) print() print( "Train Loss: {:.4f} \t Train Accuracy: {} \t Mean AUC: {:.5f}". format(np.mean(epoch_train_loss), epoch_train_acc, np.mean(epoch_train_acc))) X_val, seq_len_val, y_val = get_batches(model.X_val, model.y_val, model.config.val_batchsize) epoch_val_loss, epoch_val_acc = run_epoch(sess, model, zip( X_val, seq_len_val, y_val), val=True) print("Val Loss: {:.4f} \t Val Accuracy: {} \t Mean AUC: {:.5f}". format(np.mean(epoch_val_loss), epoch_val_acc, np.mean(epoch_val_acc))) print() train_acc_history[epoch, :] = epoch_train_acc val_acc_history[epoch, :] = epoch_val_acc train_loss_history[epoch, :] = np.array(epoch_train_loss) val_loss_history[epoch, :] = np.array(epoch_val_loss) val_loss = np.mean(epoch_val_loss) if np.mean(epoch_val_acc) > best_val_acc: best_val_loss = val_loss best_epoch = epoch best_val_acc = np.mean(epoch_val_acc) saver = tf.train.Saver() saver.save(sess, './weights/%s' % model.config.model_name) if epoch - best_epoch > model.config.early_stopping: ## Stop on no improvement print('Stopping due to early stopping') break if epoch - best_epoch > model.config.anneal_threshold: ## Anneal lr on no improvement in val loss model.config.lr *= model.config.annealing_factor print("Annealing learning rate to {}".format(model.config.lr)) print('Best Validation Accuracy is {}'.format(best_val_acc))
def test_model(test=False): config = Config() model = Model(config, 'train.csv', debug=False) start_time = time.time() train_model(model) ## Save the weights and model print() print("#" * 20) print('Completed Training') print('Training Time:{} minutes'.format((time.time() - start_time) / 60)) if not test: return test_data = pd.read_csv('test.csv') X_test = test_data['comment_text'].values test_idx = test_data.iloc[:, 0].values print("Generating test results ...") model.config.batch_size = 59 with tf.Session() as sess: saver = tf.train.import_meta_graph('./weights/%s.meta' % model.config.model_name) saver.restore(sess, './weights/%s' % model.config.model_name) X_test, test_seq_length = get_batches( X=X_test, y=None, batch_size=model.config.batch_size, shuffle=False) e_pred = [] for X, seq in zip(X_test, test_seq_length): ## Run test in batches feed = model.build_feeddict(X, seq, val=True) p = sess.run(model.pred, feed_dict=feed) e_pred.append(p) prediction = np.concatenate(e_pred, axis=0) assert (len(test_idx) == len(prediction)) ## Code to write the output submissions to a file submit_df = pd.DataFrame({ 'id': test_idx, 'toxic': prediction[:, 0], 'severe_toxic': prediction[:, 1], 'obscene': prediction[:, 2], 'threat': prediction[:, 3], 'insult': prediction[:, 4], 'identity_hate': prediction[:, 5] }) submit_df.to_csv('submission.csv', index=False, columns=[ 'id', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate' ])
def train(): if not tf.gfile.Exists(config.PREPROCESS_DATA): print('预处理文件不存在,重新生成预处理文件并存入:{}'.format(config.PREPROCESS_DATA)) data_utils.preprocess_and_save_data(config.source_path, config.target_path) (source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = data_utils.load_preprocess() print("训练数据加载成功") train_graph = tf.Graph() with train_graph.as_default(): model = Seq2Seq_Model( num_units=config.num_units, # keep_prob, batch_size=config.batch_size, source_vocab_size=len(source_vocab_to_int), target_vocab_size=len(target_vocab_to_int), encoding_embedding_size=config.encoding_embedding_size, decoding_embedding_size=config.decoding_embedding_size, target_vocab_to_int=target_vocab_to_int, mode='train') model.build_model() # Split data to training and validation sets batch_size = config.batch_size train_source = source_int_text[batch_size:] train_target = target_int_text[batch_size:] valid_source = source_int_text[:batch_size] valid_target = target_int_text[:batch_size] (valid_sources_batch, valid_targets_batch, valid_sources_lengths, valid_targets_lengths) = next( data_utils.get_batches(valid_source, valid_target, batch_size, source_vocab_to_int['<PAD>'], target_vocab_to_int['<PAD>'])) with tf.Session( graph=train_graph, config=tf.ConfigProto(device_count={'GPU': 0})) as sess: sess.run(tf.global_variables_initializer()) for epoch_i in range(1, config.num_epochs + 1): for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in \ enumerate(data_utils.get_batches(train_source, train_target, config.batch_size, source_vocab_to_int['<PAD>'], target_vocab_to_int['<PAD>'])): train_loss = model.train(sess, source_batch, target_batch, sources_lengths, targets_lengths, config.learning_rate) if batch_i % config.display_step == 0 and batch_i > 0: valid_loss = model.eval( sess, valid_sources_batch, valid_targets_batch, valid_sources_lengths, valid_targets_lengths, ) print( 'Epoch {:>3} Batch {:>4}/{} - Loss: {:>6.4f}, Valid Loss: {:6.4f}' .format(epoch_i, batch_i, len(source_int_text) // batch_size, train_loss, valid_loss)) # Train Accuracy: {:>6.4f}, Validation Accuracy: {:>6.4f}, | , train_acc, valid_acc # Save Model saver = tf.train.Saver() saver.save(sess, config.save_path) print('Model Trained and Saved')
def verify(self, example): start_time = time.time() embeddings, tokens = self.target.get_embeddings([example]) length = embeddings.shape[1] tokens = tokens[0] self.logger.write("tokens:", " ".join(tokens)) self.logger.write("length:", length) self.logger.write("label:", example["label"]) self.std = self.target.step([example], infer_grad=True)[-1] result = { "tokens": tokens, "label": float(example["label"]), "bounds": [] } cnt = 0 sum_eps, min_eps = 0, 1e30 assert (self.perturbed_words == 1) # [CLS] and [SEP] cannot be perturbed for i in range(1, length - 1): # skip OOV if tokens[i][0] == "#" or tokens[i + 1][0] == "#": continue candidates = [] for w in self.words: _tokens = copy.deepcopy(tokens) _tokens[i] = w sent = "" for _w in _tokens[1:-1]: if _w[0] == "#": sent += _w[2:] + " " else: sent += _w + " " candidates.append({ "sent_a": sent.split(), "label": example["label"] }) epsilon = 1e10 epsilon_max = 0 for batch in get_batches(candidates, self.batch_size): r = self.target.step(batch)[-1] dist = torch.norm(r["embedding_output"][:, i] - embeddings[0][i].unsqueeze(0), p=self.p, dim=-1) for j in range(len(batch)): if r["pred_labels"][j] != example["label"]: epsilon = min(epsilon, float(dist[j])) epsilon_max = max(epsilon_max, float(dist[j])) epsilon = min(epsilon, epsilon_max) epsilon_normalized = epsilon / torch.norm(embeddings[0, i], p=self.p) self.logger.write("Position %d: %s %.5f %.5f" % (i, tokens[i], epsilon, epsilon_normalized)) result["bounds"].append({ "position": i, "eps": float(epsilon), "eps_normalized": float(epsilon_normalized) }) cnt += 1 sum_eps += epsilon min_eps = min(min_eps, epsilon) result["time"] = time.time() - start_time self.logger.write("Time elapsed", result["time"]) return result, sum_eps / cnt, min_eps
args = update_arguments(args) set_seeds(args.seed) data_train, data_valid, data_test, _, _ = load_data(args) set_seeds(args.seed) import tensorflow as tf config = tf.ConfigProto(device_count={'GPU': 0}) config.gpu_options.allow_growth = True sess = tf.Session(config=config) with sess.as_default(): target = Transformer(args, data_train) random.shuffle(data_valid) random.shuffle(data_test) valid_batches = get_batches(data_valid, args.batch_size) test_batches = get_batches(data_test, args.batch_size) print("Dataset sizes: %d/%d/%d" % (len(data_train), len(data_valid), len(data_test))) summary_names = ["loss", "accuracy"] summary_num_pre = 2 logger = Logger(sess, args, summary_names, 1) print("\n") if args.train: while logger.epoch.eval() <= args.num_epoches: random.shuffle(data_train) train_batches = get_batches(data_train, args.batch_size)
def main(unused_argv): if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want if FLAGS.mode == 'rl_train': tf.logging.info('Starting model in %s mode...', FLAGS.mode + '_' + FLAGS.reward_type) else: tf.logging.info('Starting model in %s mode...', FLAGS.mode) # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'beam_search_decode': FLAGS.batch_size = FLAGS.beam_size train_data, valid_data, test_data = prepare_dataset(FLAGS.data_path) print('TrainData Size:', len(train_data)) print('ValidData Size:', len(valid_data)) print('TestData Size:', len(test_data)) print("Building vocabulary ..... ") word2id, id2word, _, max_ending_len, min_ending_len = creat_vocab( train_data, FLAGS.word_vocab_size) print("Finished building vocabulary!") word_vocab_size = len(word2id.keys()) # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'loss_rate_of_sem', 'loss_rate_of_mle', 'word_vocab_size', 'use_mixed_loss', 'lr', 'train_keep_prob', 'rl_loss_scale_factor', 'rand_unif_init_mag', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'coverage', 'cov_loss_wt', 'pointer_gen' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps_dict['max_dec_steps'] = max_ending_len hps_dict['min_ending_len'] = min_ending_len if FLAGS.word_vocab_size == None: hps_dict['word_vocab_size'] = word_vocab_size hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # create minibatches of data train_batches = get_batches(len(train_data), FLAGS.batch_size) valid_batches = get_batches(len(valid_data), FLAGS.batch_size) tf.set_random_seed(111) # a seed value for randomness if hps.mode == 'seq2seq_train': train_dir = os.path.join(FLAGS.exp_name, "train_seq2seq") if not os.path.exists(train_dir): os.makedirs(train_dir) with tf.Graph().as_default(): initializer = tf.random_uniform_initializer( -hps.rand_unif_init_mag, hps.rand_unif_init_mag) with tf.variable_scope("Model", reuse=None, initializer=initializer): m_train = SCST_RLModel(is_training=True, hps=hps) with tf.variable_scope("Model", reuse=True, initializer=initializer): m_valid = SCST_RLModel(is_training=False, hps=hps) if FLAGS.convert_to_coverage_model: assert FLAGS.coverage, "To convert your non-coverage model to a coverage model, run with convert_to_coverage_model=True and coverage=True" convert_to_coverage_model() sv = tf.train.Supervisor(logdir=train_dir, save_model_secs=FLAGS.save_model_secs) sess_context_manager = sv.managed_session(config=util.get_config()) tf.logging.info("Created session.") try: run_seq2seq_training( m_train, m_valid, train_data, train_batches, valid_data, valid_batches, word2id, max_ending_len, sv, sess_context_manager ) # this is an infinite loop until interrupted except KeyboardInterrupt: tf.logging.info( "Caught keyboard interrupt on worker. Stopping supervisor..." ) sv.stop() elif hps.mode == 'rl_train': train_dir = os.path.join( FLAGS.exp_name, "train_rl" + '_' + FLAGS.reward_type + 'mu_' + str(FLAGS.rl_loss_scale_factor)) if not os.path.exists(train_dir): os.makedirs(train_dir) with tf.Graph().as_default(): initializer = tf.random_uniform_initializer( -hps.rand_unif_init_mag, hps.rand_unif_init_mag) with tf.variable_scope("Model", reuse=None, initializer=initializer): m_train = SCST_RLModel(is_training=True, hps=hps) with tf.variable_scope("Model", reuse=True, initializer=initializer): m_valid = SCST_RLModel(is_training=False, hps=hps) # define load_pretrain funtion for restoring best seq2seq model from eval_dir ckpt_dir = 'eval_seq2seq' latest_filename = "checkpoint_best" if ckpt_dir == "eval_seq2seq" else None ckpt_dir = os.path.join(FLAGS.exp_name, ckpt_dir) ckpt_state = tf.train.get_checkpoint_state( ckpt_dir, latest_filename=latest_filename) print("loading pre_trained seq2seq model from %s", ckpt_state.model_checkpoint_path) saver = tf.train.Saver() def load_pretrain(sess): return saver.restore(sess, ckpt_state.model_checkpoint_path) sv = tf.train.Supervisor(logdir=train_dir, saver=saver, save_model_secs=FLAGS.save_model_secs, init_fn=load_pretrain) sess_context_manager = sv.managed_session(config=util.get_config()) tf.logging.info("Created session.") try: run_rl_training(m_train, m_valid, train_data, train_batches, valid_data, valid_batches, word2id, max_ending_len, sv, sess_context_manager ) # this is an infinite loop until interrupted except KeyboardInterrupt: tf.logging.info( "Caught keyboard interrupt on worker. Stopping supervisor..." ) sv.stop() elif hps.mode == 'beam_search_decode': # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries test_examples_list = prepare_data_for_beam_seach_decode( test_data, FLAGS.batch_size, word2id, max_plot_len, max_ending_len, FLAGS.pointer_gen) with tf.Graph().as_default(): initializer = tf.random_uniform_initializer( -hps.rand_unif_init_mag, hps.rand_unif_init_mag) with tf.variable_scope("Model", reuse=None, initializer=initializer): model_test = SCST_RLModel(is_training=False, hps=decode_model_hps) run_beam_search_decode(model_test, test_examples_list, id2word, data='test_data', ckpt_dir=FLAGS.decode_ckpt_dir) else: raise ValueError( "The 'mode' flag must be one of seq2seq_train/rl_train/beam_search_decode" )
sess.run(tf.global_variables_initializer()) train_current_step = 0 dev_current_step = 0 train_writer = tf.summary.FileWriter(args.summary_dir + 'train', graph=sess.graph) dev_writer = tf.summary.FileWriter(args.summary_dir + 'dev') train_batch = Batch() dev_batch = Batch() for e in range(args.numEpochs): dev_loss_sum = 0 print("----- Epoch {}/{} -----".format(e + 1, args.numEpochs)) for batch_i, (train_source_batch, train_target_batch, train_source_length, train_target_length) in enumerate( get_batches(source_data_train, target_data_train, args.batch_size, source_word_to_idx['<PAD>'])): train_batch.encoder_inputs = train_source_batch train_batch.decoder_targets = train_target_batch train_batch.encoder_inputs_length = train_source_length train_batch.decoder_targets_length = train_target_length # # Tqdm 是一个快速,可扩展的Python进度条,可以在 Python 长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator)。 # for nextBatch in tqdm(batches, desc="Training"): train_loss, train_summary = model.train(sess, train_batch) train_current_step += 1 # 每多少步进行一次保存 if train_current_step % args.steps_per_checkpoint == 0: