def get_result(loaders, model, phase, loss_scaling=1000.0, lambda_BN=0.0, gamma=0.0, block_size=16): time_ep = time.time() res = utils.run_epoch(loaders[phase], model, criterion, optimizer=optimizer, phase=phase, loss_scaling=loss_scaling, lambda_BN=lambda_BN, gamma=gamma, block_size=block_size) time_pass = time.time() - time_ep res['time_pass'] = time_pass return res
def get_result(loaders, model, phase): time_ep = time.time() res = utils.run_epoch(loaders[phase], model, criterion, optimizer=optimizer, phase=phase) time_pass = time.time() - time_ep res["time_pass"] = time_pass return res
def get_result(loaders, model, phase, loss_scaling=1000.0, lambda_BN=0.0, lambda_CG=0.0, target_cg_threshold=0.0): time_ep = time.time() res = utils.run_epoch(loaders[phase], model, criterion, optimizer=optimizer, phase=phase, loss_scaling=loss_scaling, lambda_BN=lambda_BN, lambda_CG=lambda_CG, target_cg_threshold=target_cg_threshold) time_pass = time.time() - time_ep res['time_pass'] = time_pass return res
def main(): args = build_args() manual_seed(args['random_seed']) net, criterion, optimizer = get_model(args) dataloaders = get_dataloaders(args) if args['inference_mode'] is False: tb_writer = SummaryWriter(args['tb_run_name']) args['tb_writer'] = tb_writer run_epochs(net, optimizer, dataloaders, criterion, args) if args['checkpoint'] == '': args['checkpoint'] = args['checkpoint_name_format'].format( checkpoint_name='best_model', **args) net.load_state_dict(torch.load(args['checkpoint'])) loss, acc, preds, gts = run_epoch( net, optimizer=optimizer, dataloader=dataloaders['test'], criterion=criterion, phase='test', device=args['device'], with_preds_and_gts=True) aucs = multiclass_roc_auc_score(preds, gts) print('{} Test loss: {:.3f}, Test acc: {:.2%}, Test AUC: {}'.format( args['job_name'], loss, acc, aucs))
alignment = None if args.alignment: alignment = AlignmentMeasurement(model, torch.device(f"cuda:{args.gpu_id+1}")) alignments = [] train_losses, val_losses, durations = [], [], [] best_val_loss = float("inf") epochs_wo_improvement = 0 model_save_path = None steps = 0 for epoch in range(1, args.max_epochs + 1): epoch_start_time = time.time() if alignment: train_loss, align_dic = utils.run_epoch(model, train_data, criterion, optimizer, vocab_size, args.chunk_length, alignment) alignments.append(align_dic) else: train_loss = utils.run_epoch(model, train_data, criterion, optimizer, vocab_size, args.chunk_length) train_losses.append(train_loss) steps += len(train_data) val_loss = utils.evaluate(model, val_data, criterion, vocab_size, args.chunk_length) val_losses.append(val_loss) if scheduler: scheduler.step(val_loss) epoch_duration = time.time() - epoch_start_time durations.append(epoch_duration)
def GAN(): # Graph Part # print("Graph initialization...") with tf.device(FLAGS.device): with tf.variable_scope("model", reuse=None): m_train = G.BEGAN(batch_size=FLAGS.tr_batch_size, is_training=True, num_keys=FLAGS.num_keys, input_length=FLAGS.hidden_state_size, output_length=FLAGS.predict_size, learning_rate=learning_rate) with tf.variable_scope("model", reuse=True): m_valid = G.BEGAN(batch_size=FLAGS.val_batch_size, is_training=False, num_keys=FLAGS.num_keys, input_length=FLAGS.hidden_state_size, output_length=FLAGS.predict_size, learning_rate=learning_rate) with tf.variable_scope("model", reuse=True): m_test = G.BEGAN(batch_size=FLAGS.test_batch_size, is_training=False, num_keys=FLAGS.num_keys, input_length=FLAGS.hidden_state_size, output_length=FLAGS.predict_size, learning_rate=learning_rate) print("Done") # Summary Part # print("Setting up summary op...") g_loss_ph = tf.placeholder(dtype=tf.float32) d_loss_ph = tf.placeholder(dtype=tf.float32) loss_summary_op_d = tf.summary.scalar("discriminatr_loss", d_loss_ph) loss_summary_op_g = tf.summary.scalar("generator_loss", g_loss_ph) valid_summary_writer = tf.summary.FileWriter(logs_dir + '/valid/', max_queue=2) train_summary_writer = tf.summary.FileWriter(logs_dir + '/train/', max_queue=2) print("Done") # Model Save Part # print("Setting up Saver...") saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(logs_dir) print("Done") # Session Part # print("Setting up Data Reader...") validation_dataset_reader = mt.Dataset( directory=test_dir, batch_size=FLAGS.val_batch_size, is_batch_zero_pad=FLAGS.is_batch_zero_pad, hidden_state_size=FLAGS.hidden_state_size, predict_size=FLAGS.predict_size, num_keys=FLAGS.num_keys, tick_interval=tick_interval, step=FLAGS.slice_step) test_dataset_reader = mt.Dataset(directory=test_dir, batch_size=FLAGS.test_batch_size, is_batch_zero_pad=FLAGS.is_batch_zero_pad, hidden_state_size=FLAGS.hidden_state_size, predict_size=FLAGS.predict_size, num_keys=FLAGS.num_keys, tick_interval=tick_interval, step=FLAGS.slice_step) print("done") sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) if ckpt and ckpt.model_checkpoint_path: # model restore saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") else: sess.run(tf.global_variables_initializer() ) # if the checkpoint doesn't exist, do initialization if FLAGS.mode == "train": train_dataset_reader = mt.Dataset( directory=train_dir, batch_size=FLAGS.tr_batch_size, is_batch_zero_pad=FLAGS.is_batch_zero_pad, hidden_state_size=FLAGS.hidden_state_size, predict_size=FLAGS.predict_size, num_keys=FLAGS.num_keys, tick_interval=tick_interval, step=FLAGS.slice_step) for itr in range(MAX_EPOCH): feed_dict = utils.run_epoch(train_dataset_reader, FLAGS.tr_batch_size, m_train, sess) if itr % 100 == 0: if FLAGS.use_began_loss: train_loss_d, train_loss_g, train_pred = sess.run( [m_train.loss_d, m_train.loss_g, m_train.predict], feed_dict=feed_dict) train_summary_str_d, train_summary_str_g = sess.run( [loss_summary_op_d, loss_summary_op_g], feed_dict={ g_loss_ph: train_loss_g, d_loss_ph: train_loss_d }) train_summary_writer.add_summary(train_summary_str_g, itr) print("Step : %d TRAINING LOSS *****************" % (itr)) print("Dicriminator_loss: %g\nGenerator_loss: %g" % (train_loss_d, train_loss_g)) if itr % 1000 == 0: if FLAGS.use_began_loss: valid_loss_d, valid_loss_g, valid_pred = utils.validation( validation_dataset_reader, FLAGS.val_batch_size, m_valid, FLAGS.hidden_state_size, FLAGS.predict_size, sess, logs_dir, itr, tick_interval) valid_summary_str_d, valid_summary_str_g = sess.run( [loss_summary_op_d, loss_summary_op_g], feed_dict={ g_loss_ph: valid_loss_g, d_loss_ph: valid_loss_d }) valid_summary_writer.add_summary(valid_summary_str_d, itr) print("Step : %d VALIDATION LOSS ***************" % (itr)) print("Dicriminator_loss: %g\nGenerator_loss: %g" % (valid_loss_d, valid_loss_g)) if itr % 1000 == 0 and itr != 0: utils.test_model(test_dataset_reader, FLAGS.test_batch_size, m_test, FLAGS.predict_size, sess, logs_dir, itr, tick_interval, 5) if itr % 1000 == 0: saver.save(sess, logs_dir + "/model.ckpt", itr) if FLAGS.mode == "test": utils.test_model(test_dataset_reader, FLAGS.test_batch_size, m_test, FLAGS.predict_size, sess, logs_dir, 9999, tick_interval, 10)
def main(): args = Args() logger = get_logger('main') EXP_NAME = 'multihead 4 layer with mask' assert EXP_NAME is not None, '이거슨 무슨 실험이냐!!' print(EXP_NAME) kaggle = KaggleData(args.train_path, args.test_path) kaggle.build_field(args.max_len, include_lengths=args.lengths) kaggle.build_dataset(split_ratio=0.9, stratified=False, strata_field='target') kaggle.build_vocab('question', args.max_vocab, min_freq=args.min_freq, pretrained_vectors=args.embedding, cache=args.cache) kaggle.build_iterator(batch_sizes=[args.batch_size] * 3, device=args.device) kaggle.summary() logger.info('building model...') model = build_model(kaggle, args) #TODO: hyperparam pos_wieght is to be tuned criterion = nn.BCEWithLogitsLoss( reduction='sum', pos_weight=torch.tensor([args.pos_weight], device=args.device)) optimizer, scheduler = build_optimizer_scheduler( 'Adam', lr=0.001, parameters=model.parameters(), factor=0.5, patience=args.scheduler_patience, verbose=True) logger.info('start training...') early_stopping = EarlyStoppingCriterion(patience=args.early_stop_patience) for epoch in range(args.epoch): loss = run_epoch(model, kaggle.train_iter, criterion, optimizer) f1_score, accuracy = evaluate(model, kaggle.valid_iter, threshold=args.threshold, vocab=kaggle.vocab, verbose=False) scheduler.step(f1_score) print('loss at epoch {}: {:.5}'.format(epoch + 1, loss)) print('f1 score / accuracy on valid: {:.4} / {:.4}'.format( f1_score, accuracy)) if early_stopping(epoch, f1_score): if early_stopping.is_improved: logger.info('best model achieved in this epoch') # TODO: path name!! torch.save(model.state_dict(), 'best_model.pt') else: logger.info('early stopping...') break print() logger.info('best model is from epoch {} (f1: {:.4})'.format( early_stopping.best_epoch, early_stopping.best_score)) model.load_state_dict(torch.load('best_model.pt')) logger.info('selecting threshold...') best = 0 best_threshold = 0 for th in np.arange(0.2, 0.6, 0.05): # FIXME: verbose f1_score, accuracy = evaluate(model, kaggle.valid_iter, threshold=float(th), vocab=kaggle.vocab, verbose=False) if f1_score > best: best = f1_score best_threshold = th print('best f1_score with threshold {}: {:.4} '.format( best_threshold, float(best))) pred_total, qid_total = inference(model, kaggle.test_iterator, best_threshold) write_to_csv(pred_total, qid_total, path='submission.csv')
grad_quant=grad_quantizer, momentum_quant=momentum_quantizer) # Prepare logging columns = [ 'ep', 'lr', 'tr_loss', 'tr_acc', 'tr_time', 'te_loss', 'te_acc', 'te_time' ] for epoch in range(args.epochs): time_ep = time.time() lr = schedule(epoch) utils.adjust_learning_rate(optimizer, lr) train_res = utils.run_epoch(loaders['train'], model, criterion, optimizer=optimizer, phase="train") time_pass = time.time() - time_ep train_res['time_pass'] = time_pass if epoch == 0 or epoch % args.eval_freq == args.eval_freq - 1 or epoch == args.epochs - 1: time_ep = time.time() test_res = utils.run_epoch(loaders['test'], model, criterion, phase="eval") time_pass = time.time() - time_ep test_res['time_pass'] = time_pass else: test_res = {'loss': None, 'accuracy': None, 'time_pass': None}
def train(): print('data_path: %s' % FLAGS.data_path) raw_data = reader.ptb_raw_data(FLAGS.data_path) train_data, valid_data, valid_nbest_data, vocab = raw_data train_data = chop(train_data, vocab['<eos>']) config = MediumConfig() if FLAGS.init_scale: config.init_scale = FLAGS.init_scale if FLAGS.learning_rate: config.learning_rate = FLAGS.learning_rate if FLAGS.max_grad_norm: config.max_grad_norm = FLAGS.max_grad_norm if FLAGS.num_layers: config.num_layers = FLAGS.num_layers if FLAGS.num_steps: config.num_steps = FLAGS.num_steps if FLAGS.hidden_size: config.hidden_size = FLAGS.hidden_size if FLAGS.max_epoch: config.max_epoch = FLAGS.max_epoch if FLAGS.max_max_epoch: config.max_max_epoch = FLAGS.max_max_epoch if FLAGS.keep_prob: config.keep_prob = FLAGS.keep_prob if FLAGS.lr_decay: config.lr_decay = FLAGS.lr_decay if FLAGS.batch_size: config.batch_size = FLAGS.batch_size if FLAGS.opt_method: config.opt_method = FLAGS.opt_method if FLAGS.log_dir: config.log_dir = FLAGS.log_dir config.h_max_log_smooth = FLAGS.h_max_log_smooth config.vocab_size = len(vocab) print('init_scale: %.2f' % config.init_scale) print('learning_rate: %.2f' % config.learning_rate) print('max_grad_norm: %.2f' % config.max_grad_norm) print('num_layers: %d' % config.num_layers) print('num_steps: %d' % config.num_steps) print('hidden_size: %d' % config.hidden_size) print('max_epoch: %d' % config.max_epoch) print('max_max_epoch: %d' % config.max_max_epoch) print('keep_prob: %.2f' % config.keep_prob) print('lr_decay: %.2f' % config.lr_decay) print('batch_size: %d' % config.batch_size) print('vocab_size: %d' % config.vocab_size) print('opt_method: %s' % config.opt_method) print('log_dir: %s' % config.log_dir) print('seed: %d' % FLAGS.seed) sys.stdout.flush() eval_config = MediumConfig() eval_config.init_scale = config.init_scale eval_config.learning_rate = config.learning_rate eval_config.max_grad_norm = config.max_grad_norm eval_config.num_layers = config.num_layers eval_config.num_steps = config.num_steps eval_config.hidden_size = config.hidden_size eval_config.max_epoch = config.max_epoch eval_config.max_max_epoch = config.max_max_epoch eval_config.keep_prob = config.keep_prob eval_config.lr_decay = config.lr_decay eval_config.batch_size = 200 # eval_config.batch_size = config.batch_size eval_config.vocab_size = len(vocab) eval_config.h_max_log_smooth = config.h_max_log_smooth prev = 0 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() if FLAGS.model_path: saver = tf.train.Saver() loss_list = [] train_perp_list = [] val_perp_list = [] val_f1_list = [] for i in range(config.max_max_epoch): shuffle(train_data) shuffled_data = list(itertools.chain(*train_data)) start_time = time.time() lr_decay = config.lr_decay**max(i - config.max_epoch, 0.0) if config.opt_method == "YF": session.run(tf.assign(m.optimizer.lr_factor, lr_decay)) else: m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity, loss = run_epoch(session, m, shuffled_data, m.train_op, verbose=True, epoch_id=i) loss_list += loss print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity, _ = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) valid_f1, num = run_epoch2(session, mvalid, valid_nbest_data, tf.no_op(), vocab['<eos>']) print("Epoch: %d Valid F1: %.2f (%d trees)" % (i + 1, valid_f1, num)) print('It took %.2f seconds' % (time.time() - start_time)) #print("summary added step", i * len(loss) ) summ = tf.Summary(value=[ tf.Summary.Value(tag="eval_perp", simple_value=valid_perplexity), ]) m.writer.add_summary(summ, i * len(loss)) summ = tf.Summary(value=[ tf.Summary.Value(tag="eval_F1", simple_value=valid_f1), ]) m.writer.add_summary(summ, i * len(loss)) train_perp_list.append([i * len(loss), train_perplexity]) val_perp_list.append([i * len(loss), valid_perplexity]) val_f1_list.append([i * len(loss), valid_f1]) if prev < valid_f1: prev = valid_f1 if FLAGS.model_path: print('Save a model to %s' % FLAGS.model_path) saver.save(session, FLAGS.model_path) pickle.dump(eval_config, open(FLAGS.model_path + '.config', 'wb')) sys.stdout.flush() with open(config.log_dir + "/loss.txt", "w") as f: np.savetxt(f, np.array(loss_list)) with open(config.log_dir + "/train_perp.txt", "w") as f: np.savetxt(f, np.array(train_perp_list)) with open(config.log_dir + "/val_perp.txt", "w") as f: np.savetxt(f, np.array(val_perp_list)) with open(config.log_dir + "/val_f1.txt", "w") as f: np.savetxt(f, np.array(val_f1_list))
def train(): print('data_path: %s' % FLAGS.data_path) raw_data = reader.ptb_raw_data3(FLAGS.data_path) train_data, silver_path, valid_data, valid_nbest_data, vocab = raw_data train_data = chop(train_data, vocab['<eos>']) config = MediumConfig() if FLAGS.init_scale: config.init_scale = FLAGS.init_scale if FLAGS.learning_rate: config.learning_rate = FLAGS.learning_rate if FLAGS.max_grad_norm: config.max_grad_norm = FLAGS.max_grad_norm if FLAGS.num_layers: config.num_layers = FLAGS.num_layers if FLAGS.num_steps: config.num_steps = FLAGS.num_steps if FLAGS.hidden_size: config.hidden_size = FLAGS.hidden_size if FLAGS.max_epoch: config.max_epoch = FLAGS.max_epoch if FLAGS.max_max_epoch: config.max_max_epoch = FLAGS.max_max_epoch if FLAGS.keep_prob: config.keep_prob = FLAGS.keep_prob if FLAGS.lr_decay: config.lr_decay = FLAGS.lr_decay if FLAGS.batch_size: config.batch_size = FLAGS.batch_size config.vocab_size = len(vocab) if FLAGS.silver: config.silver = FLAGS.silver print('init_scale: %.2f' % config.init_scale) print('learning_rate: %.2f' % config.learning_rate) print('max_grad_norm: %.2f' % config.max_grad_norm) print('num_layers: %d' % config.num_layers) print('num_steps: %d' % config.num_steps) print('hidden_size: %d' % config.hidden_size) print('max_epoch: %d' % config.max_epoch) print('max_max_epoch: %d' % config.max_max_epoch) print('keep_prob: %.2f' % config.keep_prob) print('lr_decay: %.2f' % config.lr_decay) print('batch_size: %d' % config.batch_size) print('vocab_size: %d' % config.vocab_size) print('silver: %d' % config.silver) sys.stdout.flush() eval_config = MediumConfig() eval_config.init_scale = config.init_scale eval_config.learning_rate = config.learning_rate eval_config.max_grad_norm = config.max_grad_norm eval_config.num_layers = config.num_layers eval_config.num_steps = config.num_steps eval_config.hidden_size = config.hidden_size eval_config.max_epoch = config.max_epoch eval_config.max_max_epoch = config.max_max_epoch eval_config.keep_prob = config.keep_prob eval_config.lr_decay = config.lr_decay eval_config.batch_size = 200 eval_config.vocab_size = len(vocab) prev = 0 # record F1 scores with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() if FLAGS.model_path: saver = tf.train.Saver() silver_generator = reader.file_to_word_ids3(silver_path) j = 0 for i in range(config.max_max_epoch): shuffle(train_data) shuffled_data = list(itertools.chain(*train_data)) start_time = time.time() lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, shuffled_data, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) valid_f1, num = run_epoch2(session, mvalid, valid_nbest_data, tf.no_op(), vocab['<eos>']) print("Epoch: %d Valid F1: %.2f (%d trees)" % (i + 1, valid_f1, num)) if valid_f1 > prev: prev = valid_f1 if FLAGS.model_path: print('Save a model to %s' % FLAGS.model_path) saver.save(session, FLAGS.model_path) pickle.dump(eval_config, open(FLAGS.model_path + '.config', 'wb')) print('It took %.2f seconds' % (time.time() - start_time)) sys.stdout.flush() start_time = time.time() for k in xrange(config.silver): try: silver_data = silver_generator.next() except: silver_generator = reader.file_to_word_ids3(silver_path) silver_data = silver_generator.next() j += 1 silver_data = chop(silver_data, vocab['<eos>']) shuffle(silver_data) silver_data = list(itertools.chain(*silver_data)) silver_perplexity = run_epoch(session, m, silver_data, m.train_op, verbose=False) print("Epoch: %d Silver(%d) Perplexity: %.3f" % (i + 1, j, silver_perplexity)) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op()) print("Epoch: %d Silver(V) Perplexity: %.3f" % (i+1, valid_perplexity)) valid_f1, num = run_epoch2(session, mvalid, valid_nbest_data, tf.no_op(), vocab['<eos>']) print("Epoch: %d Silver(V) F1: %.2f (%d trees)" % (i+1, valid_f1, num)) if valid_f1 > prev: prev = valid_f1 if FLAGS.model_path: print('Save a model to %s' % FLAGS.model_path) saver.save(session, FLAGS.model_path) pickle.dump(eval_config, open(FLAGS.model_path + '.config', 'wb')) print('It took %.2f seconds' % (time.time() - start_time)) sys.stdout.flush()