def train(args): iters, vocab = get_iterator(args) model = get_model(args, vocab) loss_fn = get_loss(args, vocab) optimizer = get_optimizer(args, model) trainer = get_trainer(args, model, loss_fn, optimizer) metrics = get_metrics(args, vocab) evaluator = get_evaluator(args, model, loss_fn, metrics) logger = get_logger(args) @trainer.on(Events.STARTED) def on_training_started(engine): print("Begin Training") @trainer.on(Events.ITERATION_COMPLETED) def log_iter_results(engine): log_results(logger, 'train/iter', engine.state, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def evaluate_epoch(engine): log_results(logger, 'train/epoch', engine.state, engine.state.epoch) state = evaluate_once(evaluator, iterator=iters['val']) log_results(logger, 'valid/epoch', state, engine.state.epoch) trainer.run(iters['train'], max_epochs=args.max_epochs)
def check_dataloader(self, **kwargs): args = self._default_args(**kwargs) iters, vocab = get_iterator(args) for batch in iters['train']: import ipdb ipdb.set_trace() # XXX DEBUG
def get_model_ckpt(args): ckpt_available = args.ckpt_name is not None vocab = None if ckpt_available: name = '{}'.format(args.ckpt_name) name = '{}*'.format(name) if not name.endswith('*') else name ckpt_paths = sorted(args.ckpt_path.glob(name), reverse=False) assert len(ckpt_paths) > 0, "no ckpt candidate for {}".format( args.ckpt_path / args.ckpt_name) ckpt_path = ckpt_paths[0] # monkey patch for choosing the best ckpt print("loading from {}".format(ckpt_path)) dt = torch.load(ckpt_path) args.update(dt['args']) vocab = dt['vocab'] iters, vocab = get_iterator(args, vocab) model = get_model(args, vocab) if ckpt_available: model.load_state_dict(dt['model']) return args, model, iters, vocab, ckpt_available
def build(self, mode): # get iterator that can iterate the dataset and batch data in supposed format if mode != tf.estimator.ModeKeys.TRAIN and mode != tf.estimator.Modekeys.INFER: raise ValueError("mode must be a key in tf.estimator.ModeKeyS") index_table = tf.contrib.lookup.index_table_from_file( param.vocab_file, num_oov_buckets=0, default_value=1) # create index_table to map a string to a integer data_file = param.data_file self.it_train = get_iterator(data_file, index_table) x, y_in, y_out, x_seq_length, y_seq_length = self.it_train.get_next( ) # got encoder/decoder input ids and their responese sequence length from iterator #x = tf.Print(x, [x, y_in, y_out, x_seq_length, y_seq_length]) # group the initialize op y_l = tf.shape(y_in)[1] # minibatch length for decoder input ids # build encoder, decoder input layer by get tokens' embedding and add the position encoding on it encoding = position_encoding_init(param.d_model, param.max_length) print encoding encoder_input = input_layer(encoding, x, param.vocab_size, x_seq_length, param.d_model, param.keep_prob, "input") decoder_input = input_layer(encoding, y_in, param.vocab_size, y_seq_length, param.d_model, param.keep_prob, "input", reuse=True) # reuse embedding # build encoder blocks, self-attention use encoder_input as both queries and keys for i in range(param.num_encoder_blocks): encoder_input = encoder_block(encoder_input, encoder_input, param.d_qkv, param.d_ff, param.num_heads, param.keep_prob, x_seq_length, x_seq_length, "encoder_block_%d" % i) encoder_output = encoder_input # build decoder blocks, self-attention use decoder_input as queries and keys, vanillia attention use encoder's output as keys, for i in range(param.num_decoder_blocks): decoder_input = decoder_block(decoder_input, encoder_output, decoder_input, param.d_qkv, param.d_ff, param.num_heads, param.keep_prob, x_seq_length, y_seq_length, "decoder_block_%d" % i) with tf.variable_scope("last_projection"): decoder_output = tf.layers.dense(decoder_input, param.vocab_size) with tf.variable_scope("loss"): mask = tf.sequence_mask(y_seq_length, y_l) mask = tf.Print(mask, [mask], summarize=1000) if mode == tf.estimator.ModeKeys.TRAIN: labels = tf.one_hot(y_out, param.vocab_size) labels_smoothed = label_smoothing(labels) loss = tf.nn.softmax_cross_entropy_with_logits( labels=labels_smoothed, logits=decoder_output) #loss = tf.Print(loss, [loss, labels_smoothed], summarize=1000) loss = loss * tf.to_float(mask) # batch, y_l self.loss = tf.reduce_sum(loss) / (tf.to_float( tf.reduce_sum(y_seq_length))) # per token loss tf.summary.scalar('loss', self.loss) self.global_step = tf.Variable(0, name='global_step', trainable=False) # optimizer self.optimizer = tf.train.AdamOptimizer( learning_rate=param.learning_rate * 10, beta1=0.9, beta2=0.98, epsilon=1e-8) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 10) self.train_op = self.optimizer.apply_gradients( zip(grads, tvars)) elif mode == tf.estimator.ModeKeys.INFER: mask = tf.expand_dims(mask, 2) # batch, y_l, 1 pred = tf.arg_max( decoder_output * tf.to_float(mask), -1 )[: -1] # batch, the last one along length dimension is the predict token for next position self.merged = tf.summary.merge_all() self.init_op = tf.group(self.it_train.initializer, tf.global_variables_initializer(), tf.tables_initializer()) def decode(): # sequential decoding is only used in infererence, during trainning, the mode decode in a parrallel mode. # batched decode should use some fuction like maybe_finished which paded the finished sample with padded value, and continue to decode the rest of samples until all samples are finished or the max decode step is reached. pass
if __name__ == '__main__': # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Params Preparation print_args(FLAGS) entity_table, entity, entity_size = load_vocab(FLAGS.entity_vocab) relation_table, _, relation_size = load_vocab(FLAGS.relation_vocab) FLAGS.entity_size = entity_size FLAGS.relation_size = relation_size # Model Preparation mode = tf.estimator.ModeKeys.TRAIN iterator = get_iterator(FLAGS.data_file, entity, entity_table, relation_table, FLAGS.batch_size, shuffle_buffer_size=FLAGS.shuffle_buffer_size) if FLAGS.model_name.lower() == "transe": model = TransE(iterator, FLAGS) elif FLAGS.model_name.lower() == "distmult": model = DISTMULT(iterator, FLAGS) elif FLAGS.model_name.lower() == "transh": model = TransH(iterator, FLAGS) elif FLAGS.model_name.lower() == "transr": model = TransR(iterator, FLAGS) elif FLAGS.model_name.lower() == "transd": model = TransD(iterator, FLAGS) elif FLAGS.model_name.lower() == "stranse": model = STransE(iterator, FLAGS)
save_path = os.path.join(FLAGS.model_dir, "model.ckpt") model.save(sess, save_path) print("Epoch {}, saved checkpoint to {}".format(epoch+1, save_path)) if __name__ == '__main__': # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Params Preparation print_args(FLAGS) vocab_table, _, vocab_size = load_vocab(FLAGS.vocab_file) FLAGS.vocab_size = vocab_size # Model Preparation padding = True if FLAGS.model_type == 1 else False mode = tf.estimator.ModeKeys.TRAIN iterator = get_iterator( FLAGS.train_file, vocab_table, FLAGS.batch_size, q_max_len=FLAGS.question_max_len, a_max_len=FLAGS.answer_max_len, num_buckets=FLAGS.num_buckets, shuffle_buffer_size=FLAGS.shuffle_buffer_size, padding=padding, ) if FLAGS.model_type == 1: model = AP_CNN(iterator, FLAGS, mode) else: model = AP_biLSTM(iterator, FLAGS, mode) train()
save_path = os.path.join(FLAGS.model_dir, "model.ckpt") model.save(sess, save_path) print("Epoch {}, saved checkpoint to {}".format( epoch + 1, save_path)) if __name__ == '__main__': # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Params Preparation print_args(FLAGS) vocab_table, _, vocab_size = load_vocab(FLAGS.vocab_file) FLAGS.vocab_size = vocab_size # Model Preparation mode = tf.estimator.ModeKeys.TRAIN iterator = get_iterator( FLAGS.train_file, vocab_table, FLAGS.batch_size, s0_max_len=FLAGS.s0_max_len, s1_max_len=FLAGS.s1_max_len, num_buckets=FLAGS.num_buckets, shuffle_buffer_size=FLAGS.shuffle_buffer_size, padding=True, ) if FLAGS.model_name.lower() == "bcnn": model = BCNN(iterator, FLAGS, mode) else: model = ABCNN(iterator, FLAGS, mode, FLAGS.model_type) train()
# save model if FLAGS.savemodel and (epoch + 1) % FLAGS.save_model_per_epochs == 0: model_name = "model_{}_{}".format( epoch + 1, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))) ckpt_path = os.path.join(FLAGS.checkpointDir, model_name) model.savedmodel(sess, signature, ckpt_path) print("Export SavedModel with acc={} to {}".format( acc, ckpt_path)) if __name__ == '__main__': # tf.set_min_vlog_level(1) # Params Preparation print_args(FLAGS) vocab_table, _, vocab_size = load_vocab(FLAGS.vocab_file) FLAGS.vocab_size = vocab_size # Model Preparation data_file_placeholder = tf.placeholder(tf.string, []) mode = tf.placeholder(tf.string, []) iterator = get_iterator(data_file_placeholder, vocab_table, FLAGS.batch_size, question_max_len=FLAGS.question_max_len, answer_max_len=FLAGS.answer_max_len, shuffle_buffer_size=FLAGS.num_samples) model = parse_model(iterator, FLAGS, mode) train()