def create_train_and_evaluate(pipeline_proto): """Creates a callable to train and evaluate. Args: pipeline_proto: an instance of pipeline_pb2.Pipeline. Returns: a callable to train and evalute. """ if not isinstance(pipeline_proto, pipeline_pb2.Pipeline): raise ValueError('pipeline_proto has to be an instance of Pipeline.') # Create train_spec. train_config = pipeline_proto.train_config train_input_fn = reader.get_input_fn(pipeline_proto.train_reader) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=train_config.max_steps) # Create eval_spec. eval_config = pipeline_proto.eval_config eval_input_fn = reader.get_input_fn(pipeline_proto.eval_reader) # eval_hooks = [ # EvalSummarySaverHook(output_dir=pipeline_proto.model_dir + '/eval') # ] eval_hooks = None eval_spec = tf.estimator.EvalSpec( input_fn=eval_input_fn, steps=eval_config.steps, hooks=eval_hooks, start_delay_secs=eval_config.start_delay_secs, throttle_secs=eval_config.throttle_secs) # Set session config. session_config = tf.ConfigProto() session_config.allow_soft_placement = True session_config.gpu_options.allow_growth = True # Create estimator. run_config = tf.estimator.RunConfig( save_summary_steps=train_config.save_summary_steps, save_checkpoints_steps=train_config.save_checkpoints_steps, session_config=session_config, keep_checkpoint_max=train_config.keep_checkpoint_max, log_step_count_steps=train_config.log_step_count_steps) model_fn = _create_model_fn(pipeline_proto, is_chief=run_config.is_chief) estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=pipeline_proto.model_dir, config=run_config) # Train and evaluate. tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train_and_evaluate(pipeline_proto, model_dir, use_mirrored_strategy=False): """Starts the estimator trainval loop. Args: pipeline_proto: An instance of pipeline_pb2.Pipeline. model_dir: Path to the directory saving checkpoint files. """ if not isinstance(pipeline_proto, pipeline_pb2.Pipeline): raise ValueError('pipeline_proto has to be an instance of Pipeline.') # Create train_spec. train_config = pipeline_proto.train_config train_input_fn = reader.get_input_fn(pipeline_proto.train_reader, is_training=True) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=train_config.max_steps) exporter = BestCheckpointCopier(name='ckpts', checkpoints_to_keep=4, score_metric='metrics/accuracy', compare_fn=lambda x, y: x.score > y.score) # Create eval_spec. eval_config = pipeline_proto.eval_config eval_input_fn = reader.get_input_fn(pipeline_proto.eval_reader, is_training=False) eval_spec = tf.estimator.EvalSpec( input_fn=eval_input_fn, steps=eval_config.steps, start_delay_secs=eval_config.start_delay_secs, throttle_secs=eval_config.throttle_secs, exporters=[exporter]) # Create run_config. strategy = None if use_mirrored_strategy: strategy = tf.contrib.distribute.MirroredStrategy() run_config = tf.estimator.RunConfig( train_distribute=strategy, session_config=tf.compat.v1.ConfigProto( allow_soft_placement=True, gpu_options=tf.compat.v1.GPUOptions( allow_growth=True, per_process_gpu_memory_fraction=1.0)), save_summary_steps=train_config.save_summary_steps, save_checkpoints_steps=train_config.save_checkpoints_steps, keep_checkpoint_max=train_config.keep_checkpoint_max, log_step_count_steps=train_config.log_step_count_steps) # Train and evaluate. model_fn = _create_model_fn(pipeline_proto, is_chief=run_config.is_chief) estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir, config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def _test(pipeline_proto, model_dir, testing_res_file): """Starts to test. Args: pipeline_proto: An instance of pipeline_pb2.Pipeline. model_dir: Path to the directory saving checkpoint files. testing_res_file: Path to the output result file. """ # Create eval_spec. eval_input_fn = reader.get_input_fn(pipeline_proto.test_reader, is_training=False) run_config = tf.estimator.RunConfig( session_config=tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions( allow_growth=True))) # Evaluate. model_fn = _create_model_fn(pipeline_proto, is_chief=run_config.is_chief) estimator = tf.estimator.Estimator( model_fn=model_fn, model_dir=None, # This is the dir to write summaries. config=run_config) checkpoint_dir = os.path.join(model_dir, 'ckpts') checkpoint_number = 0 for file_name in os.listdir(checkpoint_dir): m = re.match(r'model.ckpt-(\d+).meta', file_name) if m: logging.info('Found checkpoint %s.', '.'.join(file_name.split('.')[:-1])) checkpoint_number = max(int(m.group(1)), checkpoint_number) if checkpoint_number > 0: testing_result_csv_file = os.path.join(checkpoint_dir, testing_res_file) # Do not re-evaluate if previous testing result is found. if os.path.isfile(testing_result_csv_file): logging.info('Found previous testing results %s.', testing_result_csv_file) # Evaluate the best checkpoint on the test set. else: checkpoint_path = os.path.join(checkpoint_dir, 'model.ckpt-%d' % checkpoint_number) logging.info('Found the best checkpoint %s.', checkpoint_path) metrics = estimator.evaluate(eval_input_fn, checkpoint_path=checkpoint_path, steps=40000) keys = [ key for key in sorted(metrics.keys()) if key.startswith('metrics') ] with open(testing_result_csv_file, 'w') as f: f.write(','.join(keys) + '\n') f.write(','.join(['%.4lf' % metrics[key] for key in keys]) + '\n') logging.info('Testing results are written to %s.', testing_result_csv_file)
def _evaluate(pipeline_proto, model_dir): """Starts a evaluation. Args: pipeline_proto: An instance of pipeline_pb2.Pipeline. model_dir: Path to the directory saving checkpoint files. """ # Create eval_spec. eval_config = pipeline_proto.eval_config eval_input_fn = reader.get_input_fn(pipeline_proto.eval_reader, is_training=False) run_config = tf.estimator.RunConfig( session_config=tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions( allow_growth=True))) checkpoint_path = tf.train.latest_checkpoint(model_dir) # Evaluate. model_fn = _create_model_fn(pipeline_proto, is_chief=run_config.is_chief) estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=None, config=run_config) estimator.evaluate(eval_input_fn, checkpoint_path=checkpoint_path, steps=eval_config.steps)
def predict(pipeline_proto, checkpoint_path=None, yield_single_examples=False): """Creates a callable to train and evaluate. Args: pipeline_proto: an instance of pipeline_pb2.Pipeline. yield_single_examples: If true, yield single examples. Yields: example: The prediction result. """ if not isinstance(pipeline_proto, pipeline_pb2.Pipeline): raise ValueError('pipeline_proto has to be an instance of Pipeline.') predict_input_fn = reader.get_input_fn(pipeline_proto.eval_reader) # Create estimator. model_fn = _create_model_fn(pipeline_proto) session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) run_config = tf.estimator.RunConfig(session_config=session_config) estimator = tf.estimator.Estimator( model_fn=model_fn, model_dir=pipeline_proto.model_dir, config=run_config) # Predict results. for example in estimator.predict( input_fn=predict_input_fn, checkpoint_path=checkpoint_path, yield_single_examples=yield_single_examples): yield example
def train_and_evaluate(pipeline_proto, model_dir): """Starts the estimator trainval loop. Args: pipeline_proto: An instance of pipeline_pb2.Pipeline. model_dir: Path to the directory saving checkpoint files. """ if not isinstance(pipeline_proto, pipeline_pb2.Pipeline): raise ValueError('pipeline_proto has to be an instance of Pipeline.') # Create train_spec. train_config = pipeline_proto.train_config train_input_fn = reader.get_input_fn(pipeline_proto.train_reader, is_training=True) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=train_config.max_steps) # Create eval_spec. eval_config = pipeline_proto.eval_config eval_input_fn = reader.get_input_fn(pipeline_proto.eval_reader, is_training=False) eval_spec = tf.estimator.EvalSpec( input_fn=eval_input_fn, steps=eval_config.steps, start_delay_secs=eval_config.start_delay_secs, throttle_secs=eval_config.throttle_secs) # Create run_config. run_config = tf.estimator.RunConfig( save_summary_steps=train_config.save_summary_steps, save_checkpoints_steps=train_config.save_checkpoints_steps, keep_checkpoint_max=train_config.keep_checkpoint_max, log_step_count_steps=train_config.log_step_count_steps) # Train and evaluate. model_fn = _create_model_fn(pipeline_proto, is_chief=run_config.is_chief) estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir, config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def predict(pipeline_proto, model_dir=None, yield_single_examples=False, params=None): """Generates inference results. Args: pipeline_proto: A pipeline_pb2.Pipeline proto. model_dir: Path to the directory saving model checkpoints. yield_single_examples: If true, yield a single example. params: Additional parameters to be passed to tf.Estimator. Yields: example: inference results. """ if not isinstance(pipeline_proto, pipeline_pb2.Pipeline): raise ValueError('pipeline_proto has to be an instance of Pipeline.') predict_input_fn = reader.get_input_fn(pipeline_proto.eval_reader, is_training=False) # Create estimator. model_fn = _create_model_fn(pipeline_proto) run_config = tf.estimator.RunConfig( session_config=tf.compat.v1.ConfigProto( gpu_options=tf.compat.v1.GPUOptions(allow_growth=True))) estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir, config=run_config, params=params) # Predict results. checkpoint_path = tf.train.latest_checkpoint(model_dir) assert checkpoint_path is not None logging.info('Loading checkpoint %s...', checkpoint_path) print('Loading checkpoint %s...' % checkpoint_path) for example in estimator.predict( input_fn=predict_input_fn, checkpoint_path=checkpoint_path, yield_single_examples=yield_single_examples): yield example
def train(pipeline_proto, model_dir, use_mirrored_strategy=False): """Starts the estimator training loop. Args: pipeline_proto: An instance of pipeline_pb2.Pipeline. model_dir: Path to the directory saving checkpoint files. """ if not isinstance(pipeline_proto, pipeline_pb2.Pipeline): raise ValueError('pipeline_proto has to be an instance of Pipeline.') # Create train_spec. train_config = pipeline_proto.train_config train_input_fn = reader.get_input_fn(pipeline_proto.train_reader, is_training=True) # Create run_config. strategy = None if use_mirrored_strategy: strategy = tf.contrib.distribute.MirroredStrategy() run_config = tf.estimator.RunConfig( train_distribute=strategy, session_config=tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)), save_summary_steps=train_config.save_summary_steps, save_checkpoints_steps=train_config.save_checkpoints_steps, keep_checkpoint_max=train_config.keep_checkpoint_max, log_step_count_steps=train_config.log_step_count_steps) # Train. model_fn = _create_model_fn(pipeline_proto, is_chief=run_config.is_chief) estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir, config=run_config) estimator.train(train_input_fn, max_steps=train_config.max_steps)
def main(_): logging.set_verbosity(logging.DEBUG) for gpu in tf.config.experimental.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) pipeline_proto = _load_pipeline_proto(FLAGS.pipeline_proto) vocab = _load_vocab_file(FLAGS.vocab_file) # Get `next_examples_ts' tensor. if 'train' in FLAGS.output_jsonl_file: input_fn = reader.get_input_fn(pipeline_proto.train_reader, is_training=False) else: input_fn = reader.get_input_fn(pipeline_proto.eval_reader, is_training=False) iterator = input_fn().make_initializable_iterator() next_examples_ts = iterator.get_next() # Build model that takes placeholder as inputs, and predicts the logits. frcnn_dims = pipeline_proto.eval_reader.vcr_text_frcnn_reader.frcnn_feature_dims (label_pl, choices_pl, choices_tag_pl, choices_len_pl) = (tf.placeholder(tf.int32, [1]), tf.placeholder(tf.int32, [1, NUM_CHOICES, None]), tf.placeholder(tf.int32, [1, NUM_CHOICES, None]), tf.placeholder(tf.int32, [1, NUM_CHOICES])) (num_detections_pl, detection_boxes_pl, detection_classes_pl, detection_scores_pl, detection_features_pl) = (tf.placeholder(tf.int32, [1]), tf.placeholder(tf.float32, [1, None, 4]), tf.placeholder(tf.int32, [1, None]), tf.placeholder(tf.float32, [1, None]), tf.placeholder(tf.float32, [1, None, frcnn_dims])) model = builder.build(pipeline_proto.model, is_training=False) logits_ts = model.predict({ InputFields.num_detections: num_detections_pl, InputFields.detection_boxes: detection_boxes_pl, InputFields.detection_classes: detection_classes_pl, InputFields.detection_scores: detection_scores_pl, InputFields.detection_features: detection_features_pl, model._field_choices: choices_pl, model._field_choices_tag: choices_tag_pl, model._field_choices_len: choices_len_pl, })[FIELD_ANSWER_PREDICTION] losses_ts = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits_ts, labels=tf.one_hot( label_pl, depth=NUM_CHOICES)) saver = tf.train.Saver() # Find the latest checkpoint file. ckpt_path = tf.train.latest_checkpoint(FLAGS.model_dir) assert ckpt_path is not None def _calc_score_and_loss(choices, choice_tag, choices_len, label, num_detections, detection_boxes, detection_clases, detection_scores, detection_features): """Get the VCR matching scores and losses.""" (scores, losses) = sess.run( [logits_ts, losses_ts], feed_dict={ label_pl: np.expand_dims(label, 0), choices_pl: np.expand_dims(choices, 0), choices_tag_pl: np.expand_dims(choices_tag, 0), choices_len_pl: np.expand_dims(choices_len, 0), num_detections_pl: np.expand_dims(num_detections, 0), detection_boxes_pl: np.expand_dims(detection_boxes, 0), detection_classes_pl: np.expand_dims(detection_clases, 0), detection_scores_pl: np.expand_dims(detection_scores, 0), detection_features_pl: np.expand_dims(detection_features, 0), }) return scores[0], losses[0] # Run inference using the pretrained Bert model. with tf.Session() as sess, open(FLAGS.output_jsonl_file, 'w') as output_fp: sess.run(iterator.initializer) sess.run(tf.tables_initializer()) saver.restore(sess, ckpt_path) logging.info('Restore from %s.', ckpt_path) batch_id = 0 while True: batch_id += 1 try: inputs_batched = sess.run(next_examples_ts) batch_size = len(inputs_batched[InputFields.annot_id]) masks = np.array([[MASK_ID], [MASK_ID], [MASK_ID], [MASK_ID]]) ones = np.array([[1], [1], [1], [1]]) for example_id in range(batch_size): (annot_id, choices, choices_tag, choices_len, label) = ( inputs_batched[ InputFields.annot_id][example_id].decode('utf8'), inputs_batched[model._field_choices][example_id], inputs_batched[model._field_choices_tag][example_id], inputs_batched[model._field_choices_len][example_id], inputs_batched[model._field_label][example_id]) (num_detections, detection_boxes, detection_clases, detection_scores, detection_features) = ( inputs_batched[InputFields.num_detections] [example_id], inputs_batched[ InputFields.detection_boxes][example_id], inputs_batched[InputFields.detection_classes] [example_id], inputs_batched[ InputFields.detection_scores][example_id], inputs_batched[ InputFields.detection_features][example_id]) # Scores of the original choices. orig_scores, orig_losses = _calc_score_and_loss( choices, choices_tag, choices_len, label, num_detections, detection_boxes, detection_clases, detection_scores, detection_features) # Adversarial atacking. max_losses = np.zeros(NUM_CHOICES) max_losses_choices = choices if FLAGS.rationale: sep_pos = np.where(choices == SEP_ID)[1].take( [1, 3, 5, 7]) else: sep_pos = np.where(choices == SEP_ID)[1] result_losses = [[] for _ in range(4)] result_tokens = [[] for _ in range(4)] for pos_id in range(sep_pos.min() + 1, choices_len.max()): # Compute the new losses. new_choices = np.concatenate([ choices[:, :pos_id], masks, choices[:, pos_id + 1:] ], -1) new_choices_tag = np.concatenate([ choices_tag[:, :pos_id], -ones, choices_tag[:, pos_id + 1:] ], -1) scores, losses = _calc_score_and_loss( new_choices, new_choices_tag, choices_len, label, num_detections, detection_boxes, detection_clases, detection_scores, detection_features) # Update the maximum values. token_id = choices[:, pos_id] is_valid = np.logical_not( np.logical_or( token_id == PAD_ID, np.logical_or(token_id == CLS_ID, token_id == SEP_ID))) for choice_id in range(4): if is_valid[choice_id]: result_losses[choice_id].append( round(float(losses[choice_id]), 4)) result_tokens[choice_id].append( vocab[choices[choice_id][pos_id]]) # Maximize loss. adversarial_select_cond = np.logical_and( losses > max_losses, is_valid) max_losses_choices = np.where( np.expand_dims(adversarial_select_cond, -1), new_choices, max_losses_choices) max_losses = np.maximum(max_losses, losses) #END: for pos_id in range(sep_pos.min() + 1, choices_len.max()): choices = pack_tensor_values(choices, choices_len, vocab) adversarial_choices = pack_tensor_values( max_losses_choices, choices_len, vocab) output_annot = { 'annot_id': annot_id, 'label': int(label), 'choices': choices, 'adversarial_choices': adversarial_choices, 'result_losses': result_losses, 'result_tokens': result_tokens, } # print(label) # for i in range(4): # print(choices[i]) # print(adversarial_choices[i]) output_fp.write(json.dumps(output_annot) + '\n') if batch_id % 10 == 0: logging.info('batch_id=%i', batch_id) except tf.errors.OutOfRangeError as ex: logging.info('Done!') break output_fp.close()