Example #1
0
def experiment_fn(run_config, params):

    model = Model()
    estimator = tf.estimator.Estimator(
            model_fn=model.model_fn,
            model_dir=Config.train.model_dir,
            params=params,
            config=run_config)

    source_vocab = data_loader.load_vocab("source_vocab")
    target_vocab = data_loader.load_vocab("target_vocab")

    Config.data.rev_source_vocab = utils.get_rev_vocab(source_vocab)
    Config.data.rev_target_vocab = utils.get_rev_vocab(target_vocab)
    Config.data.source_vocab_size = len(source_vocab)
    Config.data.target_vocab_size = len(target_vocab)

    train_data, test_data = data_loader.make_train_and_test_set()
    train_input_fn, train_input_hook = data_loader.get_dataset_batch(train_data,
                                                                     batch_size=Config.model.batch_size,
                                                                     scope="train")
    test_input_fn, test_input_hook = data_loader.get_dataset_batch(test_data,
                                                                   batch_size=Config.model.batch_size,
                                                                   scope="test")

    train_hooks = [train_input_hook]
    if Config.train.print_verbose:
        train_hooks.append(hook.print_variables(
            variables=['train/enc_0'],
            rev_vocab=utils.get_rev_vocab(source_vocab),
            every_n_iter=Config.train.check_hook_n_iter))
        train_hooks.append(hook.print_variables(
            variables=['train/target_0', 'train/pred_0'],
            rev_vocab=utils.get_rev_vocab(target_vocab),
            every_n_iter=Config.train.check_hook_n_iter))
    if Config.train.debug:
        train_hooks.append(tf_debug.LocalCLIDebugHook())

    eval_hooks = [test_input_hook]
    if Config.train.debug:
        eval_hooks.append(tf_debug.LocalCLIDebugHook())

    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=test_input_fn,
        train_steps=Config.train.train_steps,
        min_eval_frequency=Config.train.min_eval_frequency,
        train_monitors=train_hooks,
        eval_hooks=eval_hooks
    )
    return experiment
Example #2
0
def experiment_fn(run_config, params):

    model = Model()
    estimator = tf.estimator.Estimator(model_fn=model.model_fn,
                                       model_dir=Config.train.model_dir,
                                       params=params,
                                       config=run_config)

    data_loader = DataLoader(task_path=Config.data.task_path,
                             task_id=Config.data.task_id,
                             task_test_id=Config.data.task_id,
                             w2v_dim=Config.model.embed_dim,
                             use_pretrained=Config.model.use_pretrained)

    data = data_loader.make_train_and_test_set()

    vocab = data_loader.vocab

    # setting data property
    Config.data.vocab_size = len(vocab)
    Config.data.max_facts_seq_len = data_loader.max_facts_seq_len
    Config.data.max_fact_count = data_loader.max_fact_count
    Config.data.max_question_seq_len = data_loader.max_question_seq_len
    print("max_facts_seq_len:", data_loader.max_facts_seq_len)
    print("max_fact_count:", data_loader.max_fact_count)
    print("max_question_seq_len:", data_loader.max_question_seq_len)

    train_input_fn, train_input_hook = data_loader.make_batch(
        data["train"], batch_size=Config.model.batch_size, scope="train")
    test_input_fn, test_input_hook = data_loader.make_batch(
        data["test"], batch_size=Config.model.batch_size, scope="test")

    train_hooks = [train_input_hook]
    if Config.train.print_verbose:
        pass
    if Config.train.debug:
        train_hooks.append(tf_debug.LocalCLIDebugHook())

    eval_hooks = [test_input_hook]
    if Config.train.debug:
        eval_hooks.append(tf_debug.LocalCLIDebugHook())

    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=test_input_fn,
        train_steps=Config.train.train_steps,
        min_eval_frequency=Config.train.min_eval_frequency,
        train_monitors=train_hooks,
        eval_hooks=eval_hooks)
    return experiment
Example #3
0
def create_experiment(output_dir, data_dir, model_name, train_steps,
                      eval_steps):
  """Create Experiment."""
  hparams = create_hparams(FLAGS.hparams_set, data_dir)
  estimator, input_fns = create_experiment_components(
      hparams=hparams,
      output_dir=output_dir,
      data_dir=data_dir,
      model_name=model_name)
  eval_metrics = metrics.create_evaluation_metrics(
      zip(FLAGS.problems.split("-"), hparams.problem_instances))
  if (hasattr(FLAGS, "autotune") and FLAGS.autotune and
      FLAGS.objective not in eval_metrics):
    raise ValueError("Tuning objective %s not among evaluation metrics %s" %
                     (FLAGS.objective, eval_metrics.keys()))
  train_monitors = []
  eval_hooks = []
  if FLAGS.tfdbg:
    hook = debug.LocalCLIDebugHook()
    train_monitors.append(hook)
    eval_hooks.append(hook)
  return tf.contrib.learn.Experiment(
      estimator=estimator,
      train_input_fn=input_fns[tf.contrib.learn.ModeKeys.TRAIN],
      eval_input_fn=input_fns[tf.contrib.learn.ModeKeys.EVAL],
      eval_metrics=eval_metrics,
      train_steps=train_steps,
      eval_steps=eval_steps,
      min_eval_frequency=FLAGS.local_eval_frequency,
      train_monitors=train_monitors,
      eval_hooks=eval_hooks)
Example #4
0
def train(args):

    # So I don't frigging forget what caused working models
    save_args(args)

    if args["use_comet"]:
        experiment = Experiment(api_key="bRptcjkrwOuba29GcyiNaGDbj",
                                project_name="macgraph")
        experiment.log_multiple_params(args)

    estimator = get_estimator(args)

    if args["use_tf_debug"]:
        hooks = [tf_debug.LocalCLIDebugHook()]
    else:
        hooks = []

    train_spec = tf.estimator.TrainSpec(
        input_fn=gen_input_fn(args, "train"),
        max_steps=args["max_steps"] *
        1000 if args["max_steps"] is not None else None,
        hooks=hooks)

    eval_spec = tf.estimator.EvalSpec(input_fn=gen_input_fn(args, "eval"),
                                      throttle_secs=300)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Example #5
0
def run(mode, run_config, params):
    model = Model()
    # ws = tf.estimator.WarmStartSettings(ckpt_to_initialize_from='logs/pretrained/vgg_16.ckpt',vars_to_warm_start='vgg_16.*')
    estimator = tf.estimator.Estimator(
        model_fn=model.model_fn,
        model_dir=Config.train.model_dir,
        params=params,
        # warm_start_from=ws,
        config=run_config)

    if Config.train.debug:
        debug_hooks = tf_debug.LocalCLIDebugHook()
        hooks = [debug_hooks]
    else:
        hooks = []

    loss_hooks = tf.train.LoggingTensorHook({'total_loss': 'loss/add_7',
                                             'content_loss': 'loss/mul_1',
                                             'style_loss': 'loss/mul_6:0',
                                             'step': 'global_step:0'}, every_n_iter=Config.train.check_hook_n_iter)

    if mode == 'train':
        train_data = data_loader.get_tfrecord(mode, shuffle=True)
        train_input_fn, train_input_hook = data_loader.get_dataset_batch(train_data, buffer_size=1000,
                                                                         batch_size=Config.model.batch_size,
                                                                         scope="train")
        hooks.extend([train_input_hook, loss_hooks])
        estimator.train(input_fn=train_input_fn, hooks=hooks, max_steps=Config.train.max_steps)

    else:
        raise ValueError('no %s mode' % (mode))
Example #6
0
def main(args):
    run_name = FLAGS.run_name or time.strftime('%Y%m%d-%H%M%S',
                                               time.localtime())
    output_dir = path.join(FLAGS.run_dir, run_name)

    gin.bind_parameter('SC2EnvironmentConfig.map_name', FLAGS.map)

    gin_files = []
    if path.exists(output_dir):
        print('Resuming', output_dir)
        gin_files.append(path.join(output_dir, 'operative_config-0.gin'))

    if FLAGS.gin_file:
        gin_files += FLAGS.gin_file

    gin.parse_config_files_and_bindings(gin_files,
                                        FLAGS.gin_param,
                                        finalize_config=True)

    env = VecEnv(SC2Environment, SC2EnvironmentConfig())
    try:
        agent = A2CAgent(env.spec,
                         callbacks=RewardSummaryHook(
                             summary_output_dir=output_dir,
                             write_summaries_secs=30))
        runner = Runner(env, agent)

        print_parameter_summary()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = FLAGS.gpu_memory_allow_growth
        if FLAGS.gpu_memory_fraction:
            config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction

        hooks = [gin.tf.GinConfigSaverHook(output_dir)]
        if FLAGS.step_limit:
            hooks.append(tf.train.StopAtStepHook(last_step=FLAGS.step_limit))
            hooks.append(LogProgressHook(FLAGS.step_limit))
        if FLAGS.profile:
            hooks.append(
                tf.train.ProfilerHook(save_secs=60, output_dir=output_dir))
        if FLAGS.debug:
            hooks.append(tf_debug.LocalCLIDebugHook())
        else:
            hooks.append(tf.train.NanTensorHook(agent.loss))
        with tf.train.MonitoredTrainingSession(
                config=config,
                hooks=hooks,
                checkpoint_dir=output_dir,
                save_summaries_secs=30,
                save_checkpoint_secs=FLAGS.save_checkpoint_secs,
                save_checkpoint_steps=FLAGS.save_checkpoint_steps) as sess:
            while not sess.should_stop():

                def step_fn(step_context):
                    runner.train(step_context, 512)

                sess.run_step_fn(step_fn)
    finally:
        env.close()
Example #7
0
def run(mode, run_config, params):
    model = Model()
    estimator = tf.estimator.Estimator(model_fn=model.model_fn,
                                       model_dir=Config.train.model_dir,
                                       params=params,
                                       config=run_config)

    if Config.train.debug:
        debug_hooks = tf_debug.LocalCLIDebugHook()
        hooks = [debug_hooks]
    else:
        hooks = []

    loss_hooks = tf.train.LoggingTensorHook(
        {
            'loss': 'loss/total_loss:0',
            'step': 'global_step:0'
        },
        every_n_iter=Config.train.check_hook_n_iter)

    train_data = data_loader.get_tfrecord(shuffle=True)

    train_input_fn, train_input_hook = data_loader.get_dataset_batch(
        train_data, batch_size=Config.model.batch_size, scope="train")

    hooks.extend([train_input_hook, loss_hooks])
    estimator.train(input_fn=train_input_fn,
                    hooks=hooks,
                    max_steps=Config.train.max_steps)
Example #8
0
def _create_hooks(mparams, output_dir):
    """Create hooks
  """
    # Create training hooks
    train_hooks = []
    # Create evaluating hooks and eval config
    eval_hooks = []

    # Write prediction to file
    prediction_hook = Prediction(mparams, FLAGS.output_dir)
    eval_hooks.append(prediction_hook)

    # Write false prediction to file
    false_prediction_hook = FalsePrediction(mparams, FLAGS.output_dir)
    eval_hooks.append(false_prediction_hook)

    if FLAGS.schedule == 'continuous_eval':
        eval_output_dir = os.path.join(output_dir, 'eval_continuous')
        eval_hooks.append(
            tf.contrib.training.SummaryAtEndHook(eval_output_dir))
    elif FLAGS.schedule == 'evaluate':
        # stop until data are exhausted
        FLAGS.eval_steps = None

    if FLAGS.debug:
        from tensorflow.python import debug as tf_debug
        debug_hook = tf_debug.LocalCLIDebugHook()
        train_hooks.append(debug_hook)
        eval_hooks.append(debug_hook)
    return train_hooks, eval_hooks
Example #9
0
def my_model(features, labels, mode, params):
  """DNN with three hidden layers, and dropout of 0.1 probability."""
  word_embed = np.load(embed_file)

  training = mode == tf.estimator.ModeKeys.TRAIN
  m = Model(params, word_embed, features, labels, training)
  
  # Compute evaluation metrics.
  metrics = {'accuracy': m.acc}
  tf.summary.scalar('accuracy', m.acc[1])

  f1_hook = F1Hook(m.pred,  labels)
  if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            mode, loss=m.loss, eval_metric_ops=metrics, evaluation_hooks=[f1_hook])

  # Create training op.
  assert mode == tf.estimator.ModeKeys.TRAIN

  training_hooks = []
  logging_hook = tf.train.LoggingTensorHook({"loss" : m.loss, 
              "accuracy" : m.acc[0]}, 
               every_n_iter=LOG_N_ITER)
  training_hooks.append(logging_hook)
  if args.tfdbg:
    training_hooks.append(tf_debug.LocalCLIDebugHook())
  
  return tf.estimator.EstimatorSpec(mode, loss=m.loss, train_op=m.train_op, 
                training_hooks = training_hooks)
Example #10
0
def get_config(args):
    if args.gpu != None:
        NR_GPU = len(args.gpu.split(','))
        batch_size = int(args.batch_size) // NR_GPU
    else:
        batch_size = int(args.batch_size)

    ds_train = get_data('train', args.multi_scale, batch_size)
    ds_test = get_data('test', False, batch_size)

    callbacks = [
        ModelSaver(),
        ScheduledHyperParamSetter('learning_rate', [(0, 1e-4), (3, 2e-4),
                                                    (6, 3e-4), (10, 6e-4),
                                                    (15, 1e-3), (60, 1e-4),
                                                    (90, 1e-5)]),
        ScheduledHyperParamSetter('unseen_scale', [(0, cfg.unseen_scale),
                                                   (cfg.unseen_epochs, 0)]),
        HumanHyperParamSetter('learning_rate'),
    ]
    if cfg.mAP == True:
        callbacks.append(
            PeriodicTrigger(InferenceRunner(ds_test, [CalMAP(cfg.test_list)]),
                            every_k_epochs=3))
    if args.debug:
        callbacks.append(HookToCallback(tf_debug.LocalCLIDebugHook()))
    return TrainConfig(
        dataflow=ds_train,
        callbacks=callbacks,
        model=Model(args.data_format),
        max_epoch=cfg.max_epoch,
    )
Example #11
0
def create_hooks(use_tfdbg=False, use_dbgprofile=False, dbgprofile_kwargs=None,
                 use_validation_monitor=False, validation_monitor_kwargs=None,
                 use_early_stopping=False, early_stopping_kwargs=None):
  """Create train and eval hooks for Experiment."""
  train_monitors = []
  eval_hooks = []

  if use_tfdbg:
    hook = debug.LocalCLIDebugHook()
    train_monitors.append(hook)
    eval_hooks.append(hook)

  if use_dbgprofile:
    # Recorded traces can be visualized with chrome://tracing/
    # The memory/tensor lifetime is also profiled
    tf.logging.info("Using ProfilerHook")
    defaults = dict(save_steps=10, show_dataflow=True, show_memory=True)
    defaults.update(dbgprofile_kwargs)
    train_monitors.append(tf.contrib.hooks.ProfilerHook(**defaults))

  if use_validation_monitor:
    tf.logging.info("Using ValidationMonitor")
    train_monitors.append(
        tf.contrib.learn.monitors.ValidationMonitor(
            hooks=eval_hooks, **validation_monitor_kwargs))

  if use_early_stopping:
    tf.logging.info("Using EarlyStoppingHook")
    hook = metrics_hook.EarlyStoppingHook(**early_stopping_kwargs)
    # Adding to both training and eval so that eval aborts as well
    train_monitors.append(hook)
    eval_hooks.append(hook)

  return train_monitors, eval_hooks
def create_experiment_fn(output_dir=None):
    """Experiment function."""
    distance_metric = (tf.contrib.factorization.COSINE_DISTANCE
                       if FLAGS.use_cosine_distance else
                       tf.contrib.factorization.SQUARED_EUCLIDEAN_DISTANCE)
    initial_clusters = (tf.contrib.factorization.KMEANS_PLUS_PLUS_INIT
                        if FLAGS.use_kmeans_plus_plus else
                        tf.contrib.factorization.RANDOM_INIT)

    # Create estimator
    kmeans = kmeans_lib.KMeansClustering(
        FLAGS.num_clusters,
        model_dir=output_dir,
        initial_clusters=initial_clusters,
        distance_metric=distance_metric,
        use_mini_batch=True,
        relative_tolerance=FLAGS.relative_tolerance,
        config=tf.contrib.learn.RunConfig(
            save_checkpoints_secs=FLAGS.save_checkpoints_secs))

    train_monitors = []
    if FLAGS.debug:
        train_monitors.append(tf_debug.LocalCLIDebugHook())

    return tf.contrib.learn.Experiment(
        estimator=kmeans,
        train_steps=FLAGS.num_train_steps,
        eval_steps=1,
        eval_input_fn=_input_fn,
        train_input_fn=_input_fn,
        train_monitors=train_monitors,
        export_strategies=[
            saved_model_export_utils.make_export_strategy(_predict_input_fn,
                                                          exports_to_keep=5)
        ])
Example #13
0
def create_hooks(use_tfdbg=False,
                 use_dbgprofile=False,
                 dbgprofile_kwargs=None,
                 use_validation_monitor=False,
                 validation_monitor_kwargs=None):
    """Create train and eval hooks for Experiment."""
    train_monitors = []
    eval_hooks = []

    if use_tfdbg:
        hook = debug.LocalCLIDebugHook()
        train_monitors.append(hook)
        eval_hooks.append(hook)

    if use_dbgprofile:
        # Recorded traces can be visualized with chrome://tracing/
        # The memory/tensor lifetime is also profiled
        defaults = dict(save_steps=10, show_dataflow=True, show_memory=True)
        defaults.update(dbgprofile_kwargs)
        train_monitors.append(tf.contrib.hooks.ProfilerHook(**defaults))

    if use_validation_monitor:
        train_monitors.append(
            tf.contrib.learn.monitors.ValidationMonitor(
                hooks=eval_hooks, **validation_monitor_kwargs))

    return train_monitors, eval_hooks
Example #14
0
def main(unused_argv):
    hparams = udc_hparams.create_hparams()

    model_fn = udc_model.create_model_fn(hparams,
                                         model_impl=dual_encoder_model)

    estimator = tf.contrib.learn.Estimator(model_fn=model_fn,
                                           model_dir=MODEL_DIR)

    input_fn_train = udc_inputs.create_input_fn(
        mode=tf.contrib.learn.ModeKeys.TRAIN,
        input_files=[TRAIN_FILE],
        batch_size=hparams.batch_size,
        num_epochs=FLAGS.num_epochs)

    input_fn_eval = udc_inputs.create_input_fn(
        mode=tf.contrib.learn.ModeKeys.EVAL,
        input_files=[VALIDATION_FILE],
        batch_size=hparams.eval_batch_size,
        num_epochs=1)

    eval_metrics = udc_metrics.create_evaluation_metrics()

    eval_monitor = tf.contrib.learn.monitors.ValidationMonitor(
        input_fn=input_fn_eval,
        every_n_steps=FLAGS.eval_every,
        metrics=eval_metrics)

    dbg_hook = tfdbg.LocalCLIDebugHook()
    estimator.fit(input_fn=input_fn_train,
                  steps=FLAGS.num_steps,
                  monitors=[eval_monitor])
Example #15
0
def create_experiment(output_dir, data_dir, model_name, train_steps,
                      eval_steps):
    """Create Experiment."""
    hparams = create_hparams(FLAGS.hparams_set,
                             FLAGS.problems,
                             data_dir,
                             passed_hparams=FLAGS.hparams)
    if FLAGS.worker_id == 0 and FLAGS.schedule in ["local_run", "train"]:
        save_metadata(output_dir, hparams)
    estimator, input_fns = create_experiment_components(hparams=hparams,
                                                        output_dir=output_dir,
                                                        data_dir=data_dir,
                                                        model_name=model_name)
    train_monitors = []
    eval_hooks = []
    if FLAGS.tfdbg:
        hook = debug.LocalCLIDebugHook()
        train_monitors.append(hook)
        eval_hooks.append(hook)
    return tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=input_fns[tf.estimator.ModeKeys.TRAIN],
        eval_input_fn=input_fns[tf.estimator.ModeKeys.EVAL],
        train_steps=train_steps,
        eval_steps=eval_steps,
        min_eval_frequency=FLAGS.local_eval_frequency,
        train_monitors=train_monitors,
        eval_hooks=eval_hooks)
Example #16
0
def experiment_fn(run_config, params):
    model = Model()  # todo 核心模型
    estimator = tf.estimator.Estimator(  # todo estimator高级api
        model_fn=model.model_fn,
        model_dir=Config.train.model_dir,
        params=params,
        config=run_config)

    vocab = data_loader.load_vocab("vocab")
    Config.data.vocab_size = len(vocab)

    # todo 这些操作完全可以使用tf.data高级api来完成
    train_data, test_data = data_loader.make_train_and_test_set()

    # todo 创建hook钩子,好好学习
    train_input_fn, train_input_hook = data_loader.make_batch(
        train_data, batch_size=Config.model.batch_size, scope="train")
    test_input_fn, test_input_hook = data_loader.make_batch(
        test_data, batch_size=Config.model.batch_size, scope="test")

    train_hooks = [train_input_hook]
    if Config.train.print_verbose:
        train_hooks.append(
            hook.print_variables(variables=['train/input_0'],
                                 rev_vocab=get_rev_vocab(vocab),
                                 every_n_iter=Config.train.check_hook_n_iter))

        train_hooks.append(
            hook.print_target(variables=['train/target_0', 'train/pred_0'],
                              every_n_iter=Config.train.check_hook_n_iter))

    if Config.train.debug:
        train_hooks.append(tf_debug.LocalCLIDebugHook())

    eval_hooks = [test_input_hook]
    if Config.train.debug:
        eval_hooks.append(tf_debug.LocalCLIDebugHook())

    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=test_input_fn,
        train_steps=Config.train.train_steps,
        min_eval_frequency=Config.train.min_eval_frequency,
        train_monitors=train_hooks,
        eval_hooks=eval_hooks)
    return experiment
Example #17
0
def train(n_epochs, config):

    train_tfrecord = "train.tfrecord"
    test_tfrecord = "test.tfrecord"

    if not os.path.exists(train_tfrecord):
        questions, answers, word2index = datas.load_dataset(
            dir=".", num_words=VOCABULARY_SIZE)
        size = len(questions)

        fractions = [0.8, 0.2, 0.0]
        # fractions = [0.1, 0.02, 0.1]
        l1 = int(fractions[0] * size)
        l2 = int((fractions[0] + fractions[1]) * size)

        TFgenerator.write_tfrecord(train_tfrecord, questions[:l1],
                                   answers[:l1])
        TFgenerator.write_tfrecord(test_tfrecord, questions[l1:l2],
                                   answers[l1:l2])

    train_iter = TFgenerator.load_datasets(train_tfrecord, n_epochs, K)
    test_iter = TFgenerator.load_datasets(test_tfrecord, 1, K)

    checkpointing_config = tf.estimator.RunConfig(
        save_checkpoints_secs=20 * 60,  # Save checkpoints every 20 minutes.
        keep_checkpoint_max=10,  # Retain the 10 most recent checkpoints.
        save_checkpoints_steps=None,
        save_summary_steps=5)

    dan_classifier = tf.estimator.Estimator(model_fn=qa_model_fn,
                                            model_dir="/tmp/DAN_model",
                                            params={},
                                            config=checkpointing_config)

    tensors_to_log = {"predictions": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=100)
    from tensorflow.python import debug as tf_debug
    hooks = [tensors_to_log, tf_debug.LocalCLIDebugHook()]

    dan_classifier.train(input_fn=lambda: train_iter, hooks=hooks)

    print dan_classifier.evaluate(input_fn=lambda: test_iter, hooks=hooks)

    def serving_input_receiver_fn():
        features = {
            'question': tf.VarLenFeature(tf.int64),
            'answer': tf.VarLenFeature(tf.int64),
            'q_len': tf.FixedLenFeature([], tf.int64),
            'a_len': tf.FixedLenFeature([], tf.int64)
        }
        serialized_tf_example = tf.placeholder(dtype=tf.string,
                                               name='input_example_tensor')
        receiver_tensors = {'examples': serialized_tf_example}
        features = tf.parse_example(serialized_tf_example, features)
        return tf.estimator.export.ServingInputReceiver(
            features, receiver_tensors)

    dan_classifier.export_savedmodel(".", serving_input_receiver_fn)
Example #18
0
def main(unused_argv):
    # Using the Winograd non-fused algorithms provides a small performance boost.
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    if FLAGS.clean_model_dir:
        shutil.rmtree(FLAGS.model_dir, ignore_errors=True)

    # Set up a RunConfig to only save checkpoints once per training cycle.
    run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9)
    model = tf.estimator.Estimator(
        model_fn=deeplab_model.deeplabv3_plus_model_fn,
        model_dir=FLAGS.model_dir,
        config=run_config,
        params={
            'output_stride': FLAGS.output_stride,
            'batch_size': FLAGS.batch_size,
            'base_architecture': FLAGS.base_architecture,
            'pre_trained_model': FLAGS.pre_trained_model,
            'batch_norm_decay': _BATCH_NORM_DECAY,
            'num_classes': _NUM_CLASSES,
            'tensorboard_images_max_outputs':
            FLAGS.tensorboard_images_max_outputs,
            'weight_decay': FLAGS.weight_decay,
            'learning_rate_policy': FLAGS.learning_rate_policy,
            'num_train': _NUM_IMAGES['train'],
            'initial_learning_rate': FLAGS.initial_learning_rate,
            'max_iter': FLAGS.max_iter,
            'end_learning_rate': FLAGS.end_learning_rate,
            'power': _POWER,
            'momentum': _MOMENTUM,
            'freeze_batch_norm': FLAGS.freeze_batch_norm,
            'initial_global_step': FLAGS.initial_global_step
        })

    for _ in range(FLAGS.train_epochs // FLAGS.epochs_per_eval):
        tensors_to_log = {
            'learning_rate': 'learning_rate',
            'cross_entropy': 'cross_entropy',
            'train_px_accuracy': 'train_px_accuracy',
            'train_mean_iou': 'train_mean_iou',
        }

        logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                                  every_n_iter=1)
        train_hooks = [logging_hook]
        eval_hooks = None

        if FLAGS.debug:
            debug_hook = tf_debug.LocalCLIDebugHook()
            train_hooks.append(debug_hook)
            eval_hooks = [debug_hook]

        tf.logging.info("Start training.")
        model.train(
            input_fn=lambda: input_fn(True, FLAGS.data_dir, FLAGS.batch_size,
                                      FLAGS.epochs_per_eval),
            hooks=train_hooks,
            # steps=1  # For debug
        )
Example #19
0
def main(unused_argv):
    # Using the Winograd non-fused algorithms provides a small performance boost.
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    pred_hooks = None
    if FLAGS.debug:
        debug_hook = tf_debug.LocalCLIDebugHook()
        pred_hooks = [debug_hook]

    model = tf.estimator.Estimator(
        model_fn=deeplab_model.deeplabv3_model_fn,
        model_dir=FLAGS.model_dir,
        params={
            'output_stride': FLAGS.output_stride,
            'batch_size':
            1,  # Batch size must be 1 because the images' size may differ
            'base_architecture': FLAGS.base_architecture,
            'pre_trained_model': None,
            'batch_norm_decay': None,
            'num_classes': _NUM_CLASSES,
        })

    examples = dataset_util.read_examples_list(FLAGS.infer_data_list)
    image_files = [
        os.path.join(FLAGS.data_dir, filename) for filename in examples
    ]

    predictions = model.predict(
        input_fn=lambda: preprocessing.eval_input_fn(image_files),
        hooks=pred_hooks)

    output_dir = FLAGS.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for pred_dict, image_path in zip(predictions, image_files):
        image_basename = os.path.splitext(os.path.basename(image_path))[0]
        output_filename = image_basename + '_mask.png'
        path_to_output = os.path.join(output_dir, output_filename)
        img = Image.open(image_path)

        print("generating:", path_to_output)
        mask = pred_dict['decoded_labels']
        mask = Image.fromarray(mask)
        crf_result = crf(img, pred_dict['probabilities'], 10)
        crf_argmax = np.expand_dims(np.expand_dims(np.argmax(crf_result,
                                                             axis=2),
                                                   axis=0),
                                    axis=3)
        crf_decode = np.squeeze(
            preprocessing.decode_labels(crf_argmax)).transpose((1, 0, 2))

        cmap = plt.get_cmap('bwr')
        f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
        ax1.imshow(mask)
        ax1.set_title('Segmentation with Deeplab')
        ax2.imshow(crf_decode, cmap=cmap)
        ax2.set_title('Segmentation with CRF post-processing _ 1')
        f.savefig(path_to_output, bbox_inches='tight')
def main(unused_arg):
    hparams = hp.create_hparams()

    model_fn = model.create_model_fn(hparams)

    estimator = tf.contrib.learn.Estimator(
        model_fn=model_fn,
        config=tf.contrib.learn.RunConfig(
            save_checkpoints_steps=FLAGS.eval_every,
            save_summary_steps=10000,
            log_step_count_steps=10000,
            model_dir=MODEL_DIR))

    input_fn_train = input.create_input_fn(
        input_files=[TRAIN_FILE_PATH],
        batch_size=hparams.batch_size,
        mode=tf.contrib.learn.ModeKeys.TRAIN,
        num_epochs=hparams.num_epochs)

    monitors_list = []

    input_fn_validation = input.create_input_fn([VALIDATION_FILE_PATH],
                                                tf.contrib.learn.ModeKeys.EVAL,
                                                hparams.eval_batch_size, 1)
    validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(
        input_fn=input_fn_validation,
        every_n_steps=FLAGS.eval_every,
        metrics=metrics.create_evaluation_metrics('validation'))
    monitors_list.append(validation_monitor)

    input_fn_test = input.create_input_fn([TEST_FILE_PATH],
                                          tf.contrib.learn.ModeKeys.EVAL,
                                          hparams.eval_batch_size, 1)
    test_monitor = tf.contrib.learn.monitors.ValidationMonitor(
        input_fn=input_fn_test,
        every_n_steps=FLAGS.eval_every,
        metrics=metrics.create_evaluation_metrics('test'))
    monitors_list.append(test_monitor)

    if FLAGS.debug:
        debuger = tf_debug.LocalCLIDebugHook()
        monitors_list.append(debuger)

    input_fn_train_eval = input.create_input_fn([TRAIN_FILE_PATH],
                                                tf.contrib.learn.ModeKeys.EVAL,
                                                hparams.batch_size, 1)
    train_monitor = tf.contrib.learn.monitors.ValidationMonitor(
        input_fn=input_fn_train_eval,
        every_n_steps=FLAGS.train_eval_every,
        metrics={
            'train_accuracy':
            metrics.create_metric_spec(tf.contrib.metrics.streaming_accuracy,
                                       'predictions', None)
        })
    monitors_list.append(train_monitor)

    estimator.fit(input_fn=input_fn_train, steps=None, monitors=monitors_list)

    hp.write_hparams_to_file(hparams, MODEL_DIR)
Example #21
0
def add_debug_hooks(hooks):
    if FLAGS.debug_tb:
        debug_hook = tf_debug.TensorBoardDebugHook("pawel-workstation:8080")
        hooks.append(debug_hook)
    elif FLAGS.debug_cli:
        debug_hook = tf_debug.LocalCLIDebugHook()
        debug_hook.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        hooks.append(debug_hook)
Example #22
0
 def train_one_epoch(self, nn, steps=None):  #pylint: disable=invalid-name
     ''' train for one epoch '''
     mode = utils.TRAIN
     tfconf = self.config['solver']['run_config']
     nn.train(
         input_fn=self.input_fn(mode),
         steps=steps,
         hooks=[tf_debug.LocalCLIDebugHook()] if tfconf['debug'] else None)
Example #23
0
 def __init__(self, *args, **kwargs):
     """
     Args:
         args, kwargs: arguments to create `tfdbg.LocalCLIDebugHook`.
             Refer to tensorflow documentation for details.
     """
     from tensorflow.python import debug as tfdbg
     super(TFLocalCLIDebugHook, self).__init__(tfdbg.LocalCLIDebugHook())
Example #24
0
def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams,
                      run_config):
    """Create Experiment."""
    estimator, input_fns = create_experiment_components(
        data_dir=data_dir,
        model_name=model_name,
        hparams=hparams,
        run_config=run_config)  # input_fns: input_function

    train_monitors = []
    eval_hooks = []
    if FLAGS.tfdbg:
        hook = debug.LocalCLIDebugHook()
        train_monitors.append(hook)
        eval_hooks.append(hook)
    if FLAGS.dbgprofile:
        # Recorded traces can be visualized with chrome://tracing/
        # The memory/tensor lifetime is also profiled
        train_monitors.append(
            tf.contrib.hooks.ProfilerHook(
                save_steps=10,
                output_dir=run_config.model_dir,
                show_dataflow=True,
                show_memory=True,
            ))
    if FLAGS.schedule == "train_and_evaluate":
        if FLAGS.local_eval_frequency:
            train_monitors.append(
                tf.contrib.learn.monitors.ValidationMonitor(
                    input_fn=input_fns[tf.estimator.ModeKeys.EVAL],
                    eval_steps=eval_steps,
                    every_n_steps=FLAGS.local_eval_frequency,
                    hooks=eval_hooks,
                    early_stopping_rounds=FLAGS.eval_early_stopping_steps,
                    early_stopping_metric=FLAGS.eval_early_stopping_metric,
                    early_stopping_metric_minimize=FLAGS.
                    eval_early_stopping_metric_minimize))

    optional_kwargs = {}
    if FLAGS.export_saved_model:
        assert len(hparams.problem_instances) == 1
        problem = hparams.problem_instances[0]
        optional_kwargs["export_strategies"] = [
            make_export_strategy(problem, hparams)
        ]

    return tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=input_fns[tf.estimator.ModeKeys.TRAIN],
        eval_input_fn=input_fns[tf.estimator.ModeKeys.EVAL],
        train_steps=train_steps,
        eval_steps=eval_steps,
        train_monitors=train_monitors,
        eval_hooks=eval_hooks,
        min_eval_frequency=FLAGS.local_eval_frequency,
        train_steps_per_iteration=FLAGS.local_eval_frequency,
        eval_delay_secs=0,
        **optional_kwargs)
Example #25
0
def main(unused_argv):
    # Using the Winograd non-fused algorithms provides a small performance boost.
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    pred_hooks = None
    if FLAGS.debug:
        debug_hook = tf_debug.LocalCLIDebugHook()
        pred_hooks = [debug_hook]

    model = tf.estimator.Estimator(
        model_fn=deeplab_model.deeplabv3_model_fn,
        model_dir=FLAGS.model_dir,
        params={
            'output_stride': FLAGS.output_stride,
            'batch_size':
            1,  # Batch size must be 1 because the images' size may differ
            'base_architecture': FLAGS.base_architecture,
            'pre_trained_model': None,
            'batch_norm_decay': None,
            'num_classes': _NUM_CLASSES,
        })

    examples = dataset_util.read_examples_list(FLAGS.infer_data_list)
    image_files = [
        os.path.join(FLAGS.data_dir, filename) + '.jpg'
        for filename in examples
    ]

    output_dir = FLAGS.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    CROP_HEIGHT = 500
    CROP_WIDTH = 500

    for img in image_files:
        width, height = Image.open(img).size
        full_mask = Image.new('RGBA', (width, height))
        print('SIZE', width, height)
        for i in range(0, int(math.ceil(width / CROP_WIDTH))):
            for j in range(0, int(math.ceil(height / CROP_HEIGHT))):
                CROP = [
                    j * CROP_HEIGHT, i * CROP_WIDTH, CROP_HEIGHT if
                    (j + 1) * CROP_HEIGHT <= height else height % CROP_HEIGHT,
                    CROP_WIDTH if
                    (i + 1) * CROP_WIDTH <= width else width % CROP_WIDTH
                ]
                print(CROP)
                predictions = model.predict(
                    input_fn=lambda: preprocessing.infer_input_fn([img], CROP),
                    hooks=pred_hooks)

                mask = next(predictions)['decoded_labels']
                mask = Image.fromarray(mask)
                full_mask.paste(mask, (CROP[1], CROP[0]))
        print('saving')
        image_basename = img.split('.')[0]
        full_mask.save(image_basename + '_mask.png')
Example #26
0
def main(_):
    # Generate some fake Iris data.
    # It is okay for this example because this example is about how to use the
    # debugger, not how to use machine learning to solve the Iris classification
    # problem.
    def training_input_fn():
        return ({
            "features": tf.random_normal([128, 4])
        }, tf.random_uniform([128], minval=0, maxval=3, dtype=tf.int32))

    def test_input_fn():
        return ({
            "features": tf.random_normal([32, 4])
        }, tf.random_uniform([32], minval=0, maxval=3, dtype=tf.int32))

    feature_columns = [
        tf.feature_column.numeric_column("features", shape=(4, ))
    ]

    # Build 3 layer DNN with 10, 20, 10 units respectively.
    model_dir = FLAGS.model_dir or tempfile.mkdtemp(
        prefix="debug_tflearn_iris_")

    classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
                                            hidden_units=[10, 20, 10],
                                            n_classes=3,
                                            model_dir=model_dir)

    if FLAGS.debug and FLAGS.tensorboard_debug_address:
        raise ValueError(
            "The --debug and --tensorboard_debug_address flags are mutually "
            "exclusive.")
    hooks = []
    if FLAGS.debug:
        config_file_path = (tempfile.mktemp(".tfdbg_config")
                            if FLAGS.use_random_config_path else None)
        hooks.append(
            tf_debug.LocalCLIDebugHook(ui_type=FLAGS.ui_type,
                                       dump_root=FLAGS.dump_root,
                                       config_file_path=config_file_path))
    elif FLAGS.tensorboard_debug_address:
        hooks.append(
            tf_debug.TensorBoardDebugHook(FLAGS.tensorboard_debug_address))

    # Train model, using tfdbg hook.
    classifier.train(training_input_fn, steps=FLAGS.train_steps, hooks=hooks)

    # Evaluate accuracy, using tfdbg hook.
    accuracy_score = classifier.evaluate(test_input_fn,
                                         steps=FLAGS.eval_steps,
                                         hooks=hooks)["accuracy"]

    print("After training %d steps, Accuracy = %f" %
          (FLAGS.train_steps, accuracy_score))

    # Make predictions, using tfdbg hook.
    predict_results = classifier.predict(test_input_fn, hooks=hooks)
    print("A prediction result: %s" % next(predict_results))
Example #27
0
def experiment_fn(run_config, params):
    # 先定义estimator
    conversation = Conversation()
    estimator = tf.estimator.Estimator(model_fn=conversation.model_fn,
                                       model_dir=Config.train.model_dir,
                                       params=params,
                                       config=run_config)

    # 返回字典
    vocab = data_loader.load_vocab("vocab")
    Config.data.vocab_size = len(vocab)

    # 定义训练数据
    train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set()

    train_input_fn, train_input_hook = data_loader.make_batch(
        (train_X, train_y), batch_size=Config.model.batch_size)
    test_input_fn, test_input_hook = data_loader.make_batch(
        (test_X, test_y), batch_size=Config.model.batch_size, scope="test")

    train_hooks = [train_input_hook]
    if Config.train.print_verbose:
        train_hooks.append(
            hook.print_variables(
                variables=['train/enc_0', 'train/dec_0', 'train/pred_0'],
                rev_vocab=utils.get_rev_vocab(vocab),
                every_n_iter=Config.train.check_hook_n_iter))
    if Config.train.debug:
        train_hooks.append(tf_debug.LocalCLIDebugHook())

    eval_hooks = [test_input_hook]
    if Config.train.debug:
        eval_hooks.append(tf_debug.LocalCLIDebugHook())

    # 定义实验
    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=test_input_fn,
        train_steps=Config.train.train_steps,
        min_eval_frequency=Config.train.min_eval_frequency,
        train_monitors=train_hooks,
        eval_hooks=eval_hooks,
        eval_delay_secs=0)
    return experiment
Example #28
0
 def train_model():
     from tensorflow.python import debug as tf_debug
     debug_hook = tf_debug.LocalCLIDebugHook()
     classifier.train(input_fn=lambda: fe.train_input_fn(
         FLAGS.train_data, FLAGS.batch_size),
                      steps=1000,
                      hooks=[
                          debug_hook,
                      ])
Example #29
0
def main(unused_argv):
  # Using the Winograd non-fused algorithms provides a small performance boost.
  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

  pred_hooks = None
  if FLAGS.debug:
    debug_hook = tf_debug.LocalCLIDebugHook()
    pred_hooks = [debug_hook]

  model = tf.estimator.Estimator(
      model_fn=deeplab_model.deeplabv3_plus_model_fn,
      model_dir=FLAGS.model_dir,
      params={
          'output_stride': FLAGS.output_stride,
          'batch_size': 1,  # Batch size must be 1 because the images' size may differ
          'base_architecture': FLAGS.base_architecture,
          'pre_trained_model': None,
          'batch_norm_decay': None,
          'num_classes': _NUM_CLASSES,
      })
  print("We are after estimator" + str(time.time()-startTime))

  examples = dataset_util.read_examples_list(FLAGS.infer_data_list)
  image_files = [os.path.join(FLAGS.data_dir, filename +".jpg") for filename in examples]
  while True:
    predictions = model.predict(
        input_fn=lambda: preprocessing.eval_input_fn(image_files),
        hooks=pred_hooks)
    # predictions = model.predict(
    #       input_fn=lambda: iter(cam),
    #       hooks=pred_hooks)

    print("We are after prediction"+ str(time.time()-startTime))

    output_dir = FLAGS.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for pred_dict, image_path in zip(predictions, image_files):
        # ret, frame = cap.read()
        # cv2.imshow("camera", frame)
        # time.sleep(1)
        # cv2.waitKey(0)
        # cv2.imwrite("./img.jpg", frame)
        print(str(time.time() - startTime))
        image_basename = os.path.splitext(os.path.basename(image_path))[0]
        output_filename = image_basename + '_mask.png'
        path_to_output = os.path.join(output_dir, output_filename)

        print("generating:", path_to_output)
        mask = pred_dict['decoded_labels']
        mask = Image.fromarray(mask)
        plt.axis('off')
        plt.imshow(mask)
        # plt.show()
        plt.savefig(path_to_output, bbox_inches='tight')
def main(unused_argv):
    # Using the Winograd non-fused algorithms provides a small performance boost.
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    pred_hooks = None
    if FLAGS.debug:
        debug_hook = tf_debug.LocalCLIDebugHook()
        pred_hooks = [debug_hook]

    model_dir = './models/model'  # FLAGS.model_dir

    model = tf.estimator.Estimator(
        model_fn=deeplab_model.deeplabv3_model_fn,
        model_dir=model_dir,
        params={
            'output_stride': FLAGS.output_stride,
            'batch_size':
            1,  # Batch size must be 1 because the images' size may differ
            'base_architecture': FLAGS.base_architecture,
            'pre_trained_model': None,
            'batch_norm_decay': None,
            'num_classes': _NUM_CLASSES,
        })

    #     examples = dataset_util.read_examples_list(FLAGS.infer_data_list)
    #     image_files = [os.path.join(FLAGS.data_dir, filename) for filename in examples]

    img_folder = cfg.values['img_folder']
    image_files = [os.path.join(img_folder, f) for f in os.listdir(img_folder)]
    # image_files = random.sample(image_files, 1)

    predictions = model.predict(
        input_fn=lambda: preprocessing.eval_input_fn(image_files),
        hooks=pred_hooks)

    output_dir = FLAGS.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for pred_dict, image_path in zip(predictions, image_files):
        image_basename = os.path.splitext(os.path.basename(image_path))[0]
        # print(pred_dict['classes'].shape)

        output_filename = image_basename + '.png'
        path_to_output = os.path.join(output_dir, output_filename)

        print("generating:", path_to_output)
        # mask = merge_colors(pred_dict['decoded_labels'])

        label_image = np.squeeze(pred_dict['classes'], axis=2)
        label_image[label_image > 0] = cfg.values['bear_label']
        # print([np.max(row) for row in label_image])
        mask = Image.fromarray(label_image, mode='L')
        mask.save(path_to_output)