def _GetMemoryOptimizerConfig(self):
     rewrite_options = rewriter_config_pb2.RewriterConfig(
         memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS)
     graph_options = config_pb2.GraphOptions(
         rewrite_options=rewrite_options)
     return config_pb2.ConfigProto(graph_options=graph_options)
Example #2
0
  def __init__(self, iterations, eval_steps):
    tf.logging.info("LowLevelRunner: constructor.")

    self.fake_feature_structure = {}
    self.feature_structure = {}
    self.fake_eval_feature_structure = {}
    self.eval_feature_structure = {}
    self.infeed_queue = []
    self.eval_infeed_queue = []
    self.fake_enqueue_ops = []
    self.enqueue_ops = []
    self.fake_eval_enqueue_ops = []
    self.eval_enqueue_ops = []
    self.fake_dataset_initializer = []
    self.dataset_initializer = []
    self.fake_eval_dataset_initializer = []
    self.eval_dataset_initializer = []
    self.outfeed_tensors = []
    self.outfeed_names = []
    self.dequeue_ops = []
    self.train_compile_op = None
    self.eval_compile_op = None
    self.loss = None
    self.eval_op = None
    self.predictions = {}
    self.iterations = iterations
    self.eval_steps = eval_steps
    self.num_hosts = FLAGS.tpu_num_shards // FLAGS.tpu_num_shards_per_host
    self.scaffold_fn = None
    self.tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.master or FLAGS.cloud_tpu_name)
    # Disable grappler for better performance.
    self.session_config = tf.ConfigProto(
        allow_soft_placement=True,
        graph_options=tf.GraphOptions(
            rewrite_options=rewriter_config_pb2.RewriterConfig(
                disable_meta_optimizer=True)),
        isolate_session_state=True,
        operation_timeout_in_ms=600 * 60 * 1000)  # 10 hours
    cluster_spec = self.tpu_cluster_resolver.cluster_spec()
    if cluster_spec:
      self.session_config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())
    self.input_graph = tf.Graph()
    self.eval_input_graph = tf.Graph()
    # Train and eval share the same session and graph so that the weights
    # can be shared for in memory eval.
    self.graph = tf.Graph()
    self.output_graph = tf.Graph()
    with self.graph.as_default():
      if FLAGS.random_seed:
        tf.random.set_random_seed(FLAGS.random_seed)
      self.num_epochs_tensor = tf.placeholder(
          tf.int32, shape=(), name="epochs")
      self.train_steps_tensor = tf.placeholder(
          tf.int32, shape=(), name="steps_per_train_loop")
      self.eval_steps_tensor = tf.placeholder(
          tf.int32, shape=(), name="steps_per_eval_loop")
      self.tpu_init = [tpu.initialize_system()]
      self.tpu_shutdown = tpu.shutdown_system()
    self.master = self.tpu_cluster_resolver.get_master()
    self.input_sess = tf.Session(
        self.master,
        graph=self.input_graph,
        config=self.session_config)
    self.eval_input_sess = tf.Session(
        self.master,
        graph=self.eval_input_graph,
        config=self.session_config)
    self.sess = tf.Session(
        self.master,
        graph=self.graph,
        config=self.session_config)
    self.output_sess = tf.Session(
        self.master,
        graph=self.output_graph,
        config=self.session_config)
    self.sess.run(self.tpu_init)
    self.infeed_thead = None
    self.train_eval_thead = None
Example #3
0
  def get_tensorrt_rewriter_config(
      cls,
      rewriter_config_template=None,
      max_batch_size=1,
      max_workspace_size_bytes=DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
      precision_mode=TrtPrecisionMode.FP32,
      minimum_segment_size=3,
      is_dynamic_op=False,
      maximum_cached_engines=1,
      cached_engine_batches=None,
      use_calibration=True,
      use_function_backup=True):
    """Returns a RewriterConfig proto for TRT transformation.

    Args:
      rewriter_config_template: a template RewriterConfig proto used to create a
        TRT-enabled RewriterConfig. If None, it will use a default one.
      max_batch_size: max size for the input batch
      max_workspace_size_bytes: the maximum GPU temporary memory which the TRT
        engine can use at execution time. This corresponds to the
        'workspaceSize' parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
      precision_mode: one of TrtPrecisionMode.supported_precision_modes().
      minimum_segment_size: the minimum number of nodes required for a subgraph
        to be replaced by TRTEngineOp.
      is_dynamic_op: whether to generate dynamic TRT ops which will build the
        TRT network and engine at run time.
      maximum_cached_engines: max number of cached TRT engines in dynamic TRT
        ops. If the number of cached engines is already at max but none of them
        can serve the input, the TRTEngineOp will fall back to run the TF
        function based on which the TRTEngineOp is created.
      cached_engine_batches: a list of batch sizes used to create cached
        engines, only used when is_dynamic_op is True. The length of the list
        should be <= maximum_cached_engines, and the dynamic TRT op will use
        this list to determine the batch sizes of the cached engines, instead of
        making the decision on the fly. This is useful when we know the most
        common batch size(s) the application is going to generate.
      use_calibration: this argument is ignored if precision_mode is not INT8.
        If set to True, a calibration graph will be created to calibrate the
        missing ranges. The calibration graph must be converted to an inference
        graph by running calibration with calibrate(). If set to False,
        quantization nodes will be expected for every tensor in the graph
        (exlcuding those which will be fused). If a range is missing, an error
        will occur. Please note that accuracy may be negatively affected if
        there is a mismatch between which tensors TRT quantizes and which
        tensors were trained with fake quantization.
      use_function_backup: if set to True, it will create a FunctionDef for each
        subgraph that is converted to TRT op, and if TRT ops fail to execute at
        runtime, it'll invoke that function as a fallback.

    Returns:
      A RewriterConfig proto which sets a TensorRTOptimizer to run Grappler.

    Raises:
      TypeError: if any of the parameters are of unexpected type.
      ValueError: if any of the parameters are of unexpected value.
    """
    # Lazily load the TF-TRT C bindings, so `import tensorflow` doesn't complain
    # even if it cannot find TensorRT library.
    trt_ops.load_trt_ops()
    # pylint: disable=g-import-not-at-top,unused-import,line-too-long,unused-variable
    # Import a random symbol to trigger loading of TRT library.
    from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version
    # pylint: enable=g-import-not-at-top,unused-import,line-too-long,unused-variable

    if rewriter_config_template is not None and not isinstance(
        rewriter_config_template, rewriter_config_pb2.RewriterConfig):
      raise TypeError(
          "rewriter_config_template should be a RewriterConfig proto.")

    rewriter_config_with_trt = rewriter_config_pb2.RewriterConfig()
    if rewriter_config_template is None:
      # Layout optimizer may add Const nodes followed by Reshape nodes, thus we
      # need to run constant folding again.
      rewriter_config_with_trt.optimizers.extend(
          ["constfold", "layout", "constfold"])
      rewriter_config_with_trt.meta_optimizer_iterations = (
          rewriter_config_pb2.RewriterConfig.ONE)
    else:
      rewriter_config_with_trt.CopyFrom(rewriter_config_template)

    optimizer = rewriter_config_with_trt.custom_optimizers.add()
    optimizer.name = "TensorRTOptimizer"
    optimizer.parameter_map["minimum_segment_size"].i = minimum_segment_size
    optimizer.parameter_map["max_batch_size"].i = max_batch_size
    optimizer.parameter_map["is_dynamic_op"].b = is_dynamic_op
    optimizer.parameter_map[
        "max_workspace_size_bytes"].i = max_workspace_size_bytes
    optimizer.parameter_map["precision_mode"].s = _to_bytes(precision_mode)
    optimizer.parameter_map["maximum_cached_engines"].i = maximum_cached_engines
    if cached_engine_batches:
      optimizer.parameter_map["cached_engine_batches"].list.i.extend(
          cached_engine_batches)
    optimizer.parameter_map["use_calibration"].b = use_calibration
    optimizer.parameter_map["use_function_backup"].b = use_function_backup
    return rewriter_config_with_trt
Example #4
0
def freeze_graph_with_def_protos(input_graph_def,
                                 input_saver_def,
                                 input_checkpoint,
                                 output_node_names,
                                 restore_op_name,
                                 filename_tensor_name,
                                 clear_devices,
                                 initializer_nodes,
                                 optimize_graph=True,
                                 variable_names_blacklist=''):
    """Converts all variables in a graph and checkpoint into constants."""
    del restore_op_name, filename_tensor_name  # Unused by updated loading code.

    # 'input_checkpoint' may be a prefix if we're using Saver V2 format
    if not saver_lib.checkpoint_exists(input_checkpoint):
        raise ValueError('Input checkpoint "' + input_checkpoint +
                         '" does not exist!')

    if not output_node_names:
        raise ValueError(
            'You must supply the name of a node to --output_node_names.')

    # Remove all the explicit device specifications for this node. This helps to
    # make the graph more portable.
    if clear_devices:
        for node in input_graph_def.node:
            node.device = ''

    with tf.Graph().as_default():
        tf.import_graph_def(input_graph_def, name='')

        if optimize_graph:
            logging.info('Graph Rewriter optimizations enabled')
            rewrite_options = rewriter_config_pb2.RewriterConfig()
            rewrite_options.optimizers.append('pruning')
            rewrite_options.optimizers.append('constfold')
            rewrite_options.optimizers.append('layout')
            graph_options = tf.GraphOptions(rewrite_options=rewrite_options,
                                            infer_shapes=True)
        else:
            logging.info('Graph Rewriter optimizations disabled')
            graph_options = tf.GraphOptions()
        config = tf.ConfigProto(graph_options=graph_options)
        with session.Session(config=config) as sess:
            if input_saver_def:
                saver = saver_lib.Saver(saver_def=input_saver_def)
                saver.restore(sess, input_checkpoint)
            else:
                var_list = {}
                reader = pywrap_tensorflow.NewCheckpointReader(
                    input_checkpoint)
                var_to_shape_map = reader.get_variable_to_shape_map()
                for key in var_to_shape_map:
                    try:
                        tensor = sess.graph.get_tensor_by_name(key + ':0')
                    except KeyError:
                        # This tensor doesn't exist in the graph (for example it's
                        # 'global_step' or a similar housekeeping element) so skip it.
                        continue
                    var_list[key] = tensor
                saver = saver_lib.Saver(var_list=var_list)
                saver.restore(sess, input_checkpoint)
                if initializer_nodes:
                    sess.run(initializer_nodes)

            variable_names_blacklist = (variable_names_blacklist.split(',')
                                        if variable_names_blacklist else None)
            output_graph_def = graph_util.convert_variables_to_constants(
                sess,
                input_graph_def,
                output_node_names.split(','),
                variable_names_blacklist=variable_names_blacklist)

    return output_graph_def
Example #5
0
def get_tensorrt_rewriter_config(rewriter_config=None,
                                 max_batch_size=1,
                                 max_workspace_size_bytes=2 << 20,
                                 precision_mode=TrtPrecisionMode.FP32,
                                 minimum_segment_size=3,
                                 is_dynamic_op=False,
                                 maximum_cached_engines=1,
                                 cached_engine_batches=None,
                                 use_calibration=True):
    """Returns a RewriterConfig proto for TRT transformation.

  Args:
    rewriter_config: a template RewriterConfig proto used to create a
      TRT-enabled RewriterConfig. If None, it will use a default one.
    max_batch_size: max size for the input batch
    max_workspace_size_bytes: the maximum GPU temporary memory which the TRT
      engine can use at execution time. This corresponds to the 'workspaceSize'
      parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
    precision_mode: one of TrtPrecisionMode.supported_precision_modes().
    minimum_segment_size: the minimum number of nodes required for a subgraph to
      be replaced by TRTEngineOp.
    is_dynamic_op: whether to generate dynamic TRT ops which will build the TRT
      network and engine at run time.
    maximum_cached_engines: max number of cached TRT engines in dynamic TRT ops.
      If the number of cached engines is already at max but none of them can
      serve the input, the TRTEngineOp will fall back to run the TF function
      based on which the TRTEngineOp is created.
    cached_engine_batches: a list of batch sizes used to create cached
      engines, only used when is_dynamic_op is True. The length of the list
      should be <= maximum_cached_engines, and the dynamic TRT op will
      use this list to determine the batch sizes of the cached engines, instead
      of making the decision on the fly. This is useful when we know the most
      common batch size(s) the application is going to generate.
    use_calibration: this argument is ignored if precision_mode is not INT8. If
      set to True, a calibration graph will be created to calibrate the missing
      ranges. The calibration graph must be converted to an inference graph
      using calib_graph_to_infer_graph() after running calibration. if set to
      False, quantization nodes will be expected for every tensor in the graph
      (exlcuding those which will be fused). If a range is missing, an error
      will occur. Please note that accuracy may be negatively affected if there
      is a mismatch between which tensors TRT quantizes and which tensors were
      trained with fake quantization.

  Returns:
    A RewriterConfig proto which sets a TensorRTOptimizer to run Grappler.

  Raises:
    TypeError: if any of the parameters are of unexpected type.
    ValueError: if any of the parameters are of unexpected value.
  """
    if rewriter_config is not None and not isinstance(
            rewriter_config, rewriter_config_pb2.RewriterConfig):
        raise TypeError("rewriter_config should be a RewriterConfig proto.")

    rewriter_config_with_trt = rewriter_config_pb2.RewriterConfig()
    if rewriter_config is None:
        # Layout optimizer may add Const nodes followed by Reshape nodes, thus we
        # need to run constant folding again.
        rewriter_config_with_trt.optimizers.extend(
            ["constfold", "layout", "constfold"])
        rewriter_config_with_trt.meta_optimizer_iterations = (
            rewriter_config_pb2.RewriterConfig.ONE)
    else:
        rewriter_config_with_trt.CopyFrom(rewriter_config)

    if precision_mode.upper(
    ) not in TrtPrecisionMode.supported_precision_modes():
        raise ValueError(("precision mode '{}' is not supported."
                          "It should be one of {}").format(
                              precision_mode,
                              TrtPrecisionMode.supported_precision_modes))

    optimizer = rewriter_config_with_trt.custom_optimizers.add()
    optimizer.name = "TensorRTOptimizer"
    optimizer.parameter_map["minimum_segment_size"].i = minimum_segment_size
    optimizer.parameter_map["max_batch_size"].i = max_batch_size
    optimizer.parameter_map["is_dynamic_op"].b = is_dynamic_op
    optimizer.parameter_map[
        "max_workspace_size_bytes"].i = max_workspace_size_bytes
    optimizer.parameter_map["precision_mode"].s = _to_bytes(precision_mode)
    optimizer.parameter_map[
        "maximum_cached_engines"].i = maximum_cached_engines
    if cached_engine_batches:
        if not isinstance(cached_engine_batches, list):
            raise TypeError("cached_engine_batches should be a list.")
        if len(cached_engine_batches) > maximum_cached_engines:
            raise ValueError(
                "cached_engine_batches should not contain more than "
                "maximum_cached_engines items.")
        optimizer.parameter_map["cached_engine_batches"].list.i.extend(
            cached_engine_batches)
    optimizer.parameter_map["use_calibration"].b = use_calibration
    return rewriter_config_with_trt
def main(unused_argv):
    steps_per_epoch = FLAGS.num_train_images // FLAGS.train_batch_size
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu if (FLAGS.tpu or FLAGS.use_tpu) else '',
        zone=FLAGS.tpu_zone,
        project=FLAGS.gcp_project)

    if FLAGS.use_train_runner:
        trunner = train_runner.TrainRunner(
            iterations=FLAGS.iterations_per_loop,
            train_steps=FLAGS.train_steps)

    if FLAGS.mode != 'eval':
        mlperf_log.resnet_print(key=mlperf_log.RUN_START)

    if FLAGS.use_async_checkpointing or FLAGS.mode == 'in_memory_eval':
        save_checkpoints_steps = None
    else:
        save_checkpoints_steps = max(100, FLAGS.iterations_per_loop)
    mlperf_log.resnet_print(key=mlperf_log.INPUT_BATCH_SIZE,
                            value=FLAGS.train_batch_size)
    mlperf_log.resnet_print(key=mlperf_log.RUN_SET_RANDOM_SEED, value='none')
    if not FLAGS.use_train_runner:
        config = tf.contrib.tpu.RunConfig(
            cluster=tpu_cluster_resolver,
            model_dir=FLAGS.model_dir,
            save_checkpoints_steps=save_checkpoints_steps,
            log_step_count_steps=FLAGS.log_step_count_steps,
            save_summary_steps=0,
            session_config=tf.ConfigProto(
                graph_options=tf.GraphOptions(
                    rewrite_options=rewriter_config_pb2.RewriterConfig(
                        disable_meta_optimizer=True))),
            tpu_config=tf.contrib.tpu.TPUConfig(
                iterations_per_loop=FLAGS.iterations_per_loop,
                # num_shards=FLAGS.num_cores,
                per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
                .PER_HOST_V2))  # pylint: disable=line-too-long

        resnet_classifier = tf.contrib.tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=resnet_model_fn,
            config=config,
            train_batch_size=FLAGS.train_batch_size,
            eval_batch_size=FLAGS.eval_batch_size,
            export_to_tpu=False)
    assert FLAGS.precision == 'bfloat16' or FLAGS.precision == 'float32', (
        'Invalid value for --precision flag; must be bfloat16 or float32.')
    tf.logging.info('Precision: %s', FLAGS.precision)
    use_bfloat16 = FLAGS.precision == 'bfloat16'

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    if FLAGS.bigtable_instance:
        tf.logging.info('Using Bigtable dataset, table %s',
                        FLAGS.bigtable_table)
        select_train, select_eval = _select_tables_from_flags()
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetBigtableInput(
                is_training=is_training,
                use_bfloat16=use_bfloat16,
                transpose_input=FLAGS.transpose_input,
                selection=selection)
            for (is_training,
                 selection) in [(True, select_train), (False, select_eval)]
        ]
    else:
        if FLAGS.data_dir == FAKE_DATA_DIR:
            tf.logging.info('Using fake dataset.')
        else:
            tf.logging.info('Using dataset: %s', FLAGS.data_dir)
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetInput(
                is_training=is_training,
                data_dir=FLAGS.data_dir,
                transpose_input=FLAGS.transpose_input,
                cache=FLAGS.use_cache and is_training,
                num_parallel_calls=FLAGS.num_parallel_calls,
                num_cores=FLAGS.num_prefetch_threads,
                prefetch_depth_auto_tune=FLAGS.prefetch_depth_auto_tune,
                use_bfloat16=use_bfloat16) for is_training in [True, False]
        ]

    if FLAGS.use_train_runner and FLAGS.mode == 'train':
        params = {'batch_size': FLAGS.train_batch_size}
        trunner.initialize(imagenet_train.input_fn, resnet_model_fn, params)

    mlperf_log.resnet_print(key=mlperf_log.PREPROC_NUM_TRAIN_EXAMPLES,
                            value=FLAGS.num_train_images)
    mlperf_log.resnet_print(key=mlperf_log.PREPROC_NUM_EVAL_EXAMPLES,
                            value=FLAGS.num_eval_images)

    steps_per_epoch = FLAGS.num_train_images // FLAGS.train_batch_size
    eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size

    if FLAGS.mode == 'eval':
        params = {'batch_size': FLAGS.eval_batch_size}
        if FLAGS.use_eval_runner:
            erunner = eval_runner.EvalRunner(input_fn=imagenet_eval.input_fn,
                                             model_fn=resnet_model_fn,
                                             params=params,
                                             num_steps=eval_steps)
        success = False
        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(
                FLAGS.model_dir, timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            current_step = int(os.path.basename(ckpt).split('-')[1])
            try:
                start_timestamp = time.time(
                )  # This time will include compilation time
                mlperf_log.resnet_print(key=mlperf_log.EVAL_START)

                if FLAGS.use_eval_runner:
                    eval_results = erunner.eval(num_steps=eval_steps,
                                                checkpoint_path=ckpt)
                else:
                    eval_results = resnet_classifier.evaluate(
                        input_fn=imagenet_eval.input_fn,
                        steps=eval_steps,
                        checkpoint_path=ckpt)

                mlperf_log.resnet_print(key=mlperf_log.EVAL_SIZE,
                                        value=FLAGS.num_eval_images)
                mlperf_log.resnet_print(key=mlperf_log.EVAL_STOP)
                mlperf_log.resnet_print(
                    key=mlperf_log.EVAL_ACCURACY,
                    value={
                        'epoch': max(current_step // steps_per_epoch - 1, 0),
                        'value': float(eval_results['top_1_accuracy'])
                    })
                mlperf_log.resnet_print(key=mlperf_log.EVAL_TARGET,
                                        value=FLAGS.stop_threshold)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)
                if eval_results['top_1_accuracy'] >= FLAGS.stop_threshold:
                    success = True
                    mlperf_log.resnet_print(key=mlperf_log.RUN_STOP,
                                            value={'success': 'true'})
                    break

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(ckpt).split('-')[1])
                if current_step >= FLAGS.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    ckpt)

        if not success:
            mlperf_log.resnet_print(key=mlperf_log.RUN_STOP,
                                    value={'success': 'false'})

    else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
        mlperf_log.resnet_print(key=mlperf_log.TRAIN_LOOP)
        mlperf_log.resnet_print(key=mlperf_log.TRAIN_EPOCH, value=0)

        if FLAGS.mode == 'train':
            if FLAGS.use_train_runner:
                trunner.train()
            else:
                hooks = []
                if FLAGS.use_async_checkpointing:
                    hooks.append(
                        async_checkpoint.AsyncCheckpointSaverHook(
                            checkpoint_dir=FLAGS.model_dir,
                            save_steps=max(100, FLAGS.iterations_per_loop)))
                    resnet_classifier.train(input_fn=imagenet_train.input_fn,
                                            max_steps=FLAGS.train_steps,
                                            hooks=hooks)
        elif FLAGS.mode == 'in_memory_eval':
            steps_per_epoch = FLAGS.num_train_images // FLAGS.train_batch_size
            hooks = []
            mlperf_log.resnet_print(key=mlperf_log.EVAL_TARGET,
                                    value=FLAGS.stop_threshold)
            hooks.append(
                in_memory_eval.TPUInMemoryEvalHook(
                    resnet_classifier,
                    imagenet_eval.input_fn,
                    steps_per_epoch,
                    stop_threshold=FLAGS.stop_threshold,
                    steps=FLAGS.num_eval_images // FLAGS.eval_batch_size,
                    every_n_iter=steps_per_epoch * 4))
            if FLAGS.use_async_checkpointing:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=FLAGS.model_dir,
                        save_steps=max(100, steps_per_epoch * 4)))
                resnet_classifier.train(input_fn=imagenet_train.input_fn,
                                        max_steps=FLAGS.train_steps,
                                        hooks=hooks)
        else:
            current_step = estimator._load_global_step_from_checkpoint_dir(
                FLAGS.model_dir)  # pylint: disable=protected-access,line-too-long
            steps_per_epoch = FLAGS.num_train_images // FLAGS.train_batch_size

            tf.logging.info(
                'Training for %d steps (%.2f epochs in total). Current'
                ' step %d.', FLAGS.train_steps,
                FLAGS.train_steps / steps_per_epoch, current_step)

            start_timestamp = time.time(
            )  # This time will include compilation time

            assert FLAGS.mode == 'train_and_eval'
            success = False
            while current_step < FLAGS.train_steps:
                # Train for up to steps_per_eval number of steps.
                # At the end of training, a checkpoint will be written to --model_dir.
                next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                                      FLAGS.train_steps)
                mlperf_log.resnet_print(key=mlperf_log.TRAIN_EPOCH,
                                        value=current_step // steps_per_epoch)
                resnet_classifier.train(input_fn=imagenet_train.input_fn,
                                        max_steps=next_checkpoint)
                current_step = next_checkpoint

                tf.logging.info(
                    'Finished training up to step %d. Elapsed seconds %d.',
                    next_checkpoint, int(time.time() - start_timestamp))

                # Evaluate the model on the most recent model in --model_dir.
                # Since evaluation happens in batches of --eval_batch_size, some images
                # may be excluded modulo the batch size. As long as the batch size is
                # consistent, the evaluated images are also consistent.
                tf.logging.info('Starting to evaluate.')
                mlperf_log.resnet_print(key=mlperf_log.EVAL_START)
                eval_results = resnet_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=FLAGS.num_eval_images // FLAGS.eval_batch_size)
                mlperf_log.resnet_print(key=mlperf_log.EVAL_SIZE,
                                        value=FLAGS.num_eval_images)
                mlperf_log.resnet_print(key=mlperf_log.EVAL_STOP)
                mlperf_log.resnet_print(
                    key=mlperf_log.EVAL_ACCURACY,
                    value={
                        'epoch': max(0, current_step // steps_per_epoch - 1),
                        'value': float(eval_results['top_1_accuracy'])
                    })
                mlperf_log.resnet_print(key=mlperf_log.EVAL_TARGET,
                                        value=FLAGS.stop_threshold)
                tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                                eval_results)
                if eval_results['top_1_accuracy'] >= FLAGS.stop_threshold:
                    success = True
                    mlperf_log.resnet_print(key=mlperf_log.RUN_STOP,
                                            value={'success': 'true'})
                    break

            elapsed_time = int(time.time() - start_timestamp)

            if not success:
                mlperf_log.resnet_print(key=mlperf_log.RUN_STOP,
                                        value={'success': 'false'})

            tf.logging.info(
                'Finished training up to step %d. Elapsed seconds %d.',
                FLAGS.train_steps, elapsed_time)

        if FLAGS.export_dir is not None:
            # The guide to serve a exported TensorFlow model is at:
            #    https://www.tensorflow.org/serving/serving_basic
            tf.logging.info('Starting to export model.')
            resnet_classifier.export_savedmodel(
                export_dir_base=FLAGS.export_dir,
                serving_input_receiver_fn=imagenet_input.image_serving_input_fn
            )

    if FLAGS.use_train_runner and FLAGS.mode == 'train':
        trunner.shutdown()

    if FLAGS.mode != 'train':
        mlperf_log.resnet_print(key=mlperf_log.RUN_FINAL)
Example #7
0
def main(unused_argv):

    input_image_size = FLAGS.input_image_size
    if not input_image_size:
        if FLAGS.model_name.startswith('efficientnet-edgetpu'):
            _, _, input_image_size, _ = efficientnet_edgetpu_builder.efficientnet_edgetpu_params(
                FLAGS.model_name)
        elif FLAGS.model_name.startswith('efficientnet'):
            _, _, input_image_size, _ = efficientnet_builder.efficientnet_params(
                FLAGS.model_name)
        else:
            raise ValueError(
                'input_image_size must be set except for EfficientNet')

    # For imagenet dataset, include background label if number of output classes
    # is 1001
    include_background_label = (FLAGS.num_label_classes == 1001)

    if FLAGS.tpu or FLAGS.use_tpu:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
    else:
        tpu_cluster_resolver = None

    if FLAGS.use_async_checkpointing:
        save_checkpoints_steps = None
    else:
        save_checkpoints_steps = max(100, FLAGS.iterations_per_loop)
    config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=save_checkpoints_steps,
        log_step_count_steps=FLAGS.log_step_count_steps,
        session_config=tf.ConfigProto(
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True))),
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
            .PER_HOST_V2))  # pylint: disable=line-too-long
    # Initializes model parameters.
    params = dict(steps_per_epoch=FLAGS.num_train_images /
                  FLAGS.train_batch_size,
                  use_bfloat16=FLAGS.use_bfloat16)
    est = tf.contrib.tpu.TPUEstimator(use_tpu=FLAGS.use_tpu,
                                      model_fn=model_fn,
                                      config=config,
                                      train_batch_size=FLAGS.train_batch_size,
                                      eval_batch_size=FLAGS.eval_batch_size,
                                      export_to_tpu=FLAGS.export_to_tpu,
                                      params=params)

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    def build_imagenet_input(is_training):
        """Generate ImageNetInput for training and eval."""
        if FLAGS.bigtable_instance:
            tf.logging.info('Using Bigtable dataset, table %s',
                            FLAGS.bigtable_table)
            select_train, select_eval = _select_tables_from_flags()
            return imagenet_input.ImageNetBigtableInput(
                is_training=is_training,
                use_bfloat16=FLAGS.use_bfloat16,
                transpose_input=FLAGS.transpose_input,
                selection=select_train if is_training else select_eval,
                include_background_label=include_background_label,
                autoaugment_name=FLAGS.autoaugment_name)
        else:
            if FLAGS.data_dir == FAKE_DATA_DIR:
                tf.logging.info('Using fake dataset.')
            else:
                tf.logging.info('Using dataset: %s', FLAGS.data_dir)

            return imagenet_input.ImageNetInput(
                is_training=is_training,
                data_dir=FLAGS.data_dir,
                transpose_input=FLAGS.transpose_input,
                cache=FLAGS.use_cache and is_training,
                image_size=input_image_size,
                num_parallel_calls=FLAGS.num_parallel_calls,
                use_bfloat16=FLAGS.use_bfloat16,
                include_background_label=include_background_label,
                autoaugment_name=FLAGS.autoaugment_name)

    imagenet_train = build_imagenet_input(is_training=True)
    imagenet_eval = build_imagenet_input(is_training=False)

    if FLAGS.mode == 'eval':
        eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size
        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(
                FLAGS.model_dir, timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            try:
                start_timestamp = time.time(
                )  # This time will include compilation time
                eval_results = est.evaluate(input_fn=imagenet_eval.input_fn,
                                            steps=eval_steps,
                                            checkpoint_path=ckpt)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)
                utils.archive_ckpt(eval_results,
                                   eval_results['top_1_accuracy'], ckpt)

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(ckpt).split('-')[1])
                if current_step >= FLAGS.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    ckpt)

        if FLAGS.export_dir:
            export(est, FLAGS.export_dir, input_image_size)
    else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
        current_step = estimator._load_global_step_from_checkpoint_dir(
            FLAGS.model_dir)  # pylint: disable=protected-access,line-too-long

        tf.logging.info(
            'Training for %d steps (%.2f epochs in total). Current'
            ' step %d.', FLAGS.train_steps,
            FLAGS.train_steps / params['steps_per_epoch'], current_step)

        start_timestamp = time.time(
        )  # This time will include compilation time

        if FLAGS.mode == 'train':
            hooks = []
            if FLAGS.use_async_checkpointing:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=FLAGS.model_dir,
                        save_steps=max(100, FLAGS.iterations_per_loop)))
            est.train(input_fn=imagenet_train.input_fn,
                      max_steps=FLAGS.train_steps,
                      hooks=hooks)

        else:
            assert FLAGS.mode == 'train_and_eval'
            while current_step < FLAGS.train_steps:
                # Train for up to steps_per_eval number of steps.
                # At the end of training, a checkpoint will be written to --model_dir.
                next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                                      FLAGS.train_steps)
                est.train(input_fn=imagenet_train.input_fn,
                          max_steps=next_checkpoint)
                current_step = next_checkpoint

                tf.logging.info(
                    'Finished training up to step %d. Elapsed seconds %d.',
                    next_checkpoint, int(time.time() - start_timestamp))

                # Evaluate the model on the most recent model in --model_dir.
                # Since evaluation happens in batches of --eval_batch_size, some images
                # may be excluded modulo the batch size. As long as the batch size is
                # consistent, the evaluated images are also consistent.
                tf.logging.info('Starting to evaluate.')
                eval_results = est.evaluate(input_fn=imagenet_eval.input_fn,
                                            steps=FLAGS.num_eval_images //
                                            FLAGS.eval_batch_size)
                tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                                eval_results)
                ckpt = tf.train.latest_checkpoint(FLAGS.model_dir)
                utils.archive_ckpt(eval_results,
                                   eval_results['top_1_accuracy'], ckpt)

            elapsed_time = int(time.time() - start_timestamp)
            tf.logging.info(
                'Finished training up to step %d. Elapsed seconds %d.',
                FLAGS.train_steps, elapsed_time)
            if FLAGS.export_dir:
                export(est, FLAGS.export_dir, input_image_size)
Example #8
0
def main(unused_argv):
  tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
      FLAGS.tpu if (FLAGS.tpu or FLAGS.use_tpu) else '',
      zone=FLAGS.tpu_zone,
      project=FLAGS.gcp_project)

  if FLAGS.use_async_checkpointing:
    save_checkpoints_steps = None
  else:
    save_checkpoints_steps = max(100, FLAGS.iterations_per_loop)
  config = tf.contrib.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      model_dir=FLAGS.model_dir,
      save_checkpoints_steps=save_checkpoints_steps,
      log_step_count_steps=FLAGS.log_step_count_steps,
      session_config=tf.ConfigProto(
          graph_options=tf.GraphOptions(
              rewrite_options=rewriter_config_pb2.RewriterConfig(
                  disable_meta_optimizer=True))),
      tpu_config=tf.contrib.tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_cores,
          per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
          .PER_HOST_V2))  # pylint: disable=line-too-long

  resnet_classifier = tf.contrib.tpu.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=resnet_model_fn,
      config=config,
      train_batch_size=FLAGS.train_batch_size,
      eval_batch_size=FLAGS.eval_batch_size,
      #predict_batch_size=PREDICT_BATCH_SIZE,
      export_to_tpu=FLAGS.export_to_tpu)

  assert FLAGS.precision == 'bfloat16' or FLAGS.precision == 'float32', (
      'Invalid value for --precision flag; must be bfloat16 or float32.')
  tf.logging.info('Precision: %s', FLAGS.precision)
  use_bfloat16 = FLAGS.precision == 'bfloat16'

  # Input pipelines are slightly different (with regards to shuffling and
  # preprocessing) between training and evaluation.
  if FLAGS.bigtable_instance:
    tf.logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table)
    select_train, select_eval = _select_tables_from_flags()
    
    imagenet_train, imagenet_eval = [imagenet_input.ImageNetBigtableInput(
        is_training=is_training,
        use_bfloat16=use_bfloat16,
        transpose_input=FLAGS.transpose_input,
        selection=selection) for (is_training, selection) in
                                     [(True, select_train),
                                      (False, select_eval)]]
    
  else:
    if FLAGS.data_dir == FAKE_DATA_DIR:
      tf.logging.info('Using fake dataset.')
    else:
      tf.logging.info('Using dataset: %s', FLAGS.data_dir)
    imagenet_train, imagenet_eval = [
        imagenet_input.ImageNetInput(
            is_training=is_training,
            data_dir=FLAGS.data_dir,
            prices_dir=FLAGS.prices_dir,
            predict_dir=FLAGS.predict_dir,
            transpose_input=FLAGS.transpose_input,
            cache=FLAGS.use_cache and is_training,
            price_count=PRICE_COUNT,
            num_parallel_calls=FLAGS.num_parallel_calls,
            use_bfloat16=use_bfloat16) for is_training in [True, False]
    ]

  steps_per_epoch = FLAGS.num_train_images // FLAGS.train_batch_size
  eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size

  if FLAGS.mode == 'eval':

    # Run evaluation when there's a new checkpoint
    for ckpt in evaluation.checkpoints_iterator(
        FLAGS.model_dir, timeout=FLAGS.eval_timeout):
      tf.logging.info('Starting to evaluate.')
      try:
        start_timestamp = time.time()  # This time will include compilation time
        eval_results = resnet_classifier.evaluate(
            input_fn=imagenet_eval.input_fn,
            steps=eval_steps,
            checkpoint_path=ckpt)
        elapsed_time = int(time.time() - start_timestamp)
        tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                        eval_results, elapsed_time)

        # Terminate eval job when final checkpoint is reached
        current_step = int(os.path.basename(ckpt).split('-')[1])
        if current_step >= FLAGS.train_steps:
          tf.logging.info(
              'Evaluation finished after training step %d', current_step)
          break

      except tf.errors.NotFoundError:
        # Since the coordinator is on a different job than the TPU worker,
        # sometimes the TPU worker does not finish initializing until long after
        # the CPU job tells it to start evaluating. In this case, the checkpoint
        # file could have been deleted already.
        tf.logging.info(
            'Checkpoint %s no longer exists, skipping checkpoint', ckpt)

  else:   # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
    current_step = estimator._load_global_step_from_checkpoint_dir(FLAGS.model_dir)  # pylint: disable=protected-access,line-too-long
    tf.logging.info('model_dir=%s,steps=%d' % (FLAGS.model_dir,current_step))
    steps_per_epoch = FLAGS.num_train_images // FLAGS.train_batch_size

    tf.logging.info('Training for %d steps (%.2f epochs in total). Current'
                    ' step %d.',
                    FLAGS.train_steps,
                    FLAGS.train_steps / steps_per_epoch,
                    current_step)

    start_timestamp = time.time()  # This time will include compilation time

    if FLAGS.mode == 'train':
      hooks = []
      if FLAGS.use_async_checkpointing:
        hooks.append(
            async_checkpoint.AsyncCheckpointSaverHook(
                checkpoint_dir=FLAGS.model_dir,
                save_steps=max(100, FLAGS.iterations_per_loop)))
      if FLAGS.profile_every_n_steps > 0:
        hooks.append(
            tpu_profiler_hook.TPUProfilerHook(
                save_steps=FLAGS.profile_every_n_steps,
                output_dir=FLAGS.model_dir, tpu=FLAGS.tpu)
            )
      resnet_classifier.train(
          input_fn=imagenet_train.input_fn,
          max_steps=FLAGS.train_steps,
          hooks=hooks)

    elif FLAGS.mode == 'train_and_eval':
      # assert FLAGS.mode == 'train_and_eval'
      while current_step < FLAGS.train_steps:
        # Train for up to steps_per_eval number of steps.
        # At the end of training, a checkpoint will be written to --model_dir.
        next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                              FLAGS.train_steps)
        resnet_classifier.train(
            input_fn=imagenet_train.input_fn, max_steps=next_checkpoint)
        current_step = next_checkpoint

        tf.logging.info('Finished training up to step %d. Elapsed seconds %d.',
                        next_checkpoint, int(time.time() - start_timestamp))

        # Evaluate the model on the most recent model in --model_dir.
        # Since evaluation happens in batches of --eval_batch_size, some images
        # may be excluded modulo the batch size. As long as the batch size is
        # consistent, the evaluated images are also consistent.
        tf.logging.info('Starting to evaluate.')
        eval_results = resnet_classifier.evaluate(
            input_fn= imagenet_eval.input_fn,
            steps=FLAGS.num_eval_images // FLAGS.eval_batch_size)
        tf.logging.info('Eval results at step %d: %s',
                        next_checkpoint, eval_results)

      elapsed_time = int(time.time() - start_timestamp)
      tf.logging.info('Finished training up to step %d. Elapsed seconds %d.',
                      FLAGS.train_steps, elapsed_time)
    else: # FLAGS.mode == 'predict'
      
      price_file_pattern = os.path.join(
        FLAGS.prices_dir, 'price-*.csv')
      while True:
        time.sleep(1)
        price_files  = glob.glob(price_file_pattern)
        if len(price_files) == 0:
          continue
        tf.logging.info('Starting to predict.')
        for price_file_item in price_files:
          with open(price_file_item,"r") as fcsv:
            csvreader = csv.reader(fcsv,delimiter = ",")
            price_batch_size = len(list(csvreader))
            
          # price_batch_size = PREDICT_BATCH_SIZE
          
          if price_batch_size == 0:
            continue
          #predictions = next(resnet_classifier.predict(
          #  input_fn=lambda params : imagenet_eval.predict_input_fn(params, price_batch_size),
          #  ), None)
          predictions = resnet_classifier.predict(
            input_fn=lambda params : imagenet_eval.predict_input_fn(params, price_batch_size, os.path.basename(price_file_item)),
            )
          
          tf.logging.info("predictions2 = %s" % predictions)
          
          # Output predictions to predict-0001.csv BorisTown 
          predict_filename_part = os.path.join(FLAGS.predict_dir, 'part-0001.part')
          predict_filename_csv = os.path.join(FLAGS.predict_dir, 'predict-0001.csv')
          if len(price_files) > 1:
            dirname = re.findall(r"price-(.+?)\.csv",price_file_item)[0]
            dirpath = os.path.join(FLAGS.predict_dir, dirname)
            if not os.path.exists(dirpath):
              os.makedirs(dirpath)
            predict_filename_part = os.path.join(dirpath, 'part-0001.part')
            predict_filename_csv = os.path.join(dirpath, 'predict-0001.csv')
          predict_file = open(predict_filename_part, "w")
          predict_file.truncate()
          predict_line = ''
          
          #outarray = np.zeros([price_batch_size, MAX_CASE*LABEL_COUNT])
          outarray = np.zeros([price_batch_size, LABEL_COUNT])
          
          for case_index, pred_item in enumerate(predictions):
            #tf.logging.info("pred_item_probabilities=%s" % (pred_item['probabilities']))
            #predict_line = ''
            for batch_index, pred_operation in enumerate(pred_item['probabilities']):
              #tf.logging.info("pred_operation.shape=%s" % (pred_operation.shape))
              for label_index in range(LABEL_COUNT):
                #predict_line += str(pred_operation[k])
                #tf.logging.info("prediction op:%s" % (pred_operation[label_index]))
                outarray[batch_index][case_index*LABEL_COUNT+label_index] = pred_operation[label_index]
             #predict_file.write(predict_line+'\n')
          #predict_file.close()
          
          #tf.logging.info('predict_line = %s' % (predict_line))
          for pred_row in outarray:
            predict_line = ''
            for pred_col in pred_row:
              if predict_line != '':
                predict_line += ','
              predict_line += str(pred_col)
            predict_file.write(predict_line+'\n')
            tf.logging.info('%s' % (predict_line))
          predict_file.close()
          os.rename(predict_filename_part, predict_filename_csv)
          if(predict_line != ''):
            #for price_file in price_files:
            tf.logging.info('Removing ' + price_file_item)
            price_file_new = price_file_item.replace("price-", "backup-")
            os.rename(price_file_item, price_file_new)
            
    if FLAGS.export_dir is not None and FLAGS.mode != 'predict':
      # The guide to serve a exported TensorFlow model is at:
      #    https://www.tensorflow.org/serving/serving_basic
      tf.logging.info('Starting to export model.')
      resnet_classifier.export_saved_model(
          export_dir_base=FLAGS.export_dir,
          serving_input_receiver_fn=imagenet_input.image_serving_input_fn)
Example #9
0
  def initialize_session(self):
    """Initializes a tf Session."""
    if ENABLE_TF_OPTIMIZATIONS:
      self.sess = tf.Session()
    else:
      rewriter_config = rewriter_config_pb2.RewriterConfig(
          disable_model_pruning=True,
          constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
          arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          remapping=rewriter_config_pb2.RewriterConfig.OFF,
          shape_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          function_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF,
          loop_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          memory_optimization=rewriter_config_pb2.RewriterConfig.NO_MEM_OPT)
      graph_options = tf.GraphOptions(rewrite_options=rewriter_config)
      session_config = tf.ConfigProto(graph_options=graph_options)
      self.sess = tf.Session(config=session_config)

    # Restore or initialize the variables.
    self.sess.run(tf.global_variables_initializer())
    self.sess.run(tf.local_variables_initializer())
    if self.learner_config.checkpoint_for_eval:
      # Requested a specific checkpoint.
      self.saver.restore(self.sess, self.learner_config.checkpoint_for_eval)
      tf.logging.info(
          'Restored checkpoint: %s' % self.learner_config.checkpoint_for_eval)
    else:
      # Continue from the latest checkpoint if one exists.
      # This handles fault-tolerance.
      latest_checkpoint = None
      if self.checkpoint_dir is not None:
        latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_dir)
      if latest_checkpoint:
        self.saver.restore(self.sess, latest_checkpoint)
        tf.logging.info('Restored checkpoint: %s' % latest_checkpoint)
      else:
        tf.logging.info('No previous checkpoint.')
        self.sess.run(tf.global_variables_initializer())
        self.sess.run(tf.local_variables_initializer())

    # For episodic models, potentially use pretrained weights at the start of
    # training. If this happens it will overwrite the embedding weights, but
    # taking care to not restore the Adam parameters.
    if self.learner_config.pretrained_checkpoint and not self.sess.run(
        tf.train.get_global_step()):
      self.saver.restore(self.sess, self.learner_config.pretrained_checkpoint)
      tf.logging.info(
          'Restored checkpoint: %s' % self.learner_config.pretrained_checkpoint)
      # We only want the embedding weights of the checkpoint we just restored.
      # So we re-initialize everything that's not an embedding weight. Also,
      # since this episodic finetuning procedure is a different optimization
      # problem than the original training of the baseline whose embedding
      # weights are re-used, we do not reload ADAM's variables and instead learn
      # them from scratch.
      vars_to_reinit, embedding_var_names, vars_to_reinit_names = [], [], []
      for var in tf.global_variables():
        if (any(keyword in var.name for keyword in EMBEDDING_KEYWORDS) and
            'adam' not in var.name.lower()):
          embedding_var_names.append(var.name)
          continue
        vars_to_reinit.append(var)
        vars_to_reinit_names.append(var.name)
      tf.logging.info(
          'Initializing all variables except for %s.' % embedding_var_names)
      self.sess.run(tf.variables_initializer(vars_to_reinit))
      tf.logging.info('Re-initialized vars %s.' % vars_to_reinit_names)
Example #10
0
        dataset = dataset.ImagenetData(data_location)
        preprocessor = image_preprocessing.ImagePreprocessor(
            input_height,
            input_width,
            batch_size,
            1,  # device count
            tf.float32,  # data_type for input fed to the graph
            train=False,  # doing inference
            resize_method='crop')
        images, labels = preprocessor.minibatch(dataset, subset='validation')

    graph = load_graph(model_file)
    input_tensor = graph.get_tensor_by_name(input_layer + ":0")
    output_tensor = graph.get_tensor_by_name(output_layer + ":0")

    rewrite_options = rewriter_config_pb2.RewriterConfig(
        layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)

    config = tf.compat.v1.ConfigProto()
    config.inter_op_parallelism_threads = num_inter_threads
    config.intra_op_parallelism_threads = num_intra_threads

    config.graph_options.rewrite_options.remapping = (
        rewriter_config_pb2.RewriterConfig.OFF)

    total_accuracy1, total_accuracy5 = (0.0, 0.0)
    num_processed_images = 0
    num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \
                              - num_processed_images
    top1 = 0
    with tf.compat.v1.Session(graph=data_graph) as sess:  ###
        sess_graph = tf.compat.v1.Session(graph=graph, config=config)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

with tf.name_scope("init_and_save"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("/tmp/MNIST_data/data/")

from tensorflow.core.protobuf import rewriter_config_pb2

rewrite_options = rewriter_config_pb2.RewriterConfig(
    disable_model_pruning=True,
    constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
    dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
    layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF,
    arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
    min_graph_nodes=-1,
    memory_optimization=rewriter_config_pb2.RewriterConfig.SWAPPING_HEURISTICS)

graph_options = tf.GraphOptions(
    rewrite_options=rewrite_options)  #, infer_shapes=True)
config = tf.ConfigProto(graph_options=graph_options,
                        allow_soft_placement=True,
                        log_device_placement=True)
config.gpu_options.allow_growth = True

#run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
#run_metadata = tf.RunMetadata()

#graph = tf.get_default_graph()
Example #12
0
 def _no_rewrite_session_config(self):
   rewriter_config = rewriter_config_pb2.RewriterConfig(
       pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF)
   graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
   return config_pb2.ConfigProto(graph_options=graph_options)
 def _no_rewrite_session_config(self):
     rewriter_config = rewriter_config_pb2.RewriterConfig(
         disable_model_pruning=True)
     graph_options = config_pb2.GraphOptions(
         rewrite_options=rewriter_config)
     return config_pb2.ConfigProto(graph_options=graph_options)
  tf.get_variable_scope().reuse_variables()




correct_prediction = tf.equal(tf.argmax(logit, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

# Training algorithm
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

run_metadata = tf.RunMetadata()

mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())

rewrite_options = rewriter_config_pb2.RewriterConfig(
        memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL)
graph_options = config_pb2.GraphOptions(rewrite_options=rewrite_options)

graph = tf_optimizer.OptimizeGraph(rewrite_options, mg)
    

session_config = None
session_config = config_pb2.ConfigProto(graph_options=graph_options)


# Training steps
with tf.Session(config=session_config) as sess:
  sess.run(tf.global_variables_initializer())

  max_steps = 10
  latency = []
Example #15
0
def main(unused_argv):
    # tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
    #     FLAGS.tpu if (FLAGS.tpu or FLAGS.use_tpu) else '',
    #     zone=FLAGS.tpu_zone,
    #     project=FLAGS.gcp_project)

    if FLAGS.use_async_checkpointing:
        save_checkpoints_steps = None
    else:
        save_checkpoints_steps = max(100, FLAGS.iterations_per_loop)

    NUM_GPUS = len(get_available_gpus())
    distribution = tf.contrib.distribute.MirroredStrategy(num_gpus=NUM_GPUS)
    gpu_options = tf.GPUOptions(allow_growth=True)

    # config = tf.contrib.tpu.RunConfig(
    #     # cluster=tpu_cluster_resolver,
    #     model_dir=FLAGS.model_dir,
    #     save_checkpoints_steps=save_checkpoints_steps,
    #     log_step_count_steps=FLAGS.log_step_count_steps,
    #     session_config=tf.ConfigProto(
    #         graph_options=tf.GraphOptions(
    #             rewrite_options=rewriter_config_pb2.RewriterConfig(
    #                 disable_meta_optimizer=True)), gpu_options=gpu_options),
    #     train_distribute=distribution,
    #     # tpu_config=tf.contrib.tpu.TPUConfig(
    #     #     iterations_per_loop=FLAGS.iterations_per_loop,
    #     #     per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
    #     #     .PER_HOST_V2)
    # )  # pylint: disable=line-too-long
    config = tf.estimator.RunConfig(
        # cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=save_checkpoints_steps,
        log_step_count_steps=FLAGS.log_step_count_steps,
        session_config=tf.ConfigProto(allow_soft_placement=True,
                                      graph_options=tf.GraphOptions(
                                          rewrite_options=rewriter_config_pb2.RewriterConfig(
                                              disable_meta_optimizer=True)), gpu_options=gpu_options),
        train_distribute=distribution,
        # log_step_count_steps=None,
        # save_summary_steps=None
        # tpu_config=tf.contrib.tpu.TPUConfig(
        #     iterations_per_loop=FLAGS.iterations_per_loop,
        #     per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
        #     .PER_HOST_V2)
    )  # pylint: disable=line-too-long

    # Initializes model parameters.
    # params = dict(steps_per_epoch=FLAGS.num_train_images / FLAGS.train_batch_size)
    # nas_est = tf.contrib.tpu.TPUEstimator(
    #     use_tpu=FLAGS.use_tpu,
    #     model_fn=nas_model_fn,
    #     config=config,
    #     train_batch_size=FLAGS.train_batch_size,
    #     eval_batch_size=FLAGS.eval_batch_size,
    #     export_to_tpu=FLAGS.export_to_tpu,
    #     params=params)
    params = dict(steps_per_epoch=FLAGS.num_train_images /
                  FLAGS.train_batch_size,
                  batch_size=FLAGS.train_batch_size)
    nas_est = tf.estimator.Estimator(model_fn=nas_model_fn,
                                     config=config,
                                     params=params)

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    if FLAGS.bigtable_instance:
        tf.logging.info('Using Bigtable dataset, table %s',
                        FLAGS.bigtable_table)
        select_train, select_eval = _select_tables_from_flags()
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetBigtableInput(
                is_training=is_training,
                use_bfloat16=False,
                transpose_input=FLAGS.transpose_input,
                selection=selection)
            for (is_training,
                 selection) in [(True, select_train), (False, select_eval)]
        ]
    else:
        if FLAGS.data_dir == FAKE_DATA_DIR:
            tf.logging.info('Using fake dataset.')
        else:
            tf.logging.info('Using dataset: %s', FLAGS.data_dir)
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetInput(
                is_training=is_training,
                data_dir=FLAGS.data_dir,
                transpose_input=FLAGS.transpose_input,
                cache=FLAGS.use_cache and is_training,
                image_size=FLAGS.input_image_size,
                num_parallel_calls=FLAGS.num_parallel_calls,
                use_bfloat16=False) for is_training in [True, False]
        ]

    if FLAGS.mode == 'eval':
        eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size
        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(
                FLAGS.model_dir, timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            try:
                start_timestamp = time.time(
                )  # This time will include compilation time
                eval_results = nas_est.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    checkpoint_path=ckpt)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(ckpt).split('-')[1])
                if current_step >= FLAGS.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    ckpt)

        if FLAGS.export_dir:
            export(nas_est, FLAGS.export_dir, FLAGS.post_quantize)
    else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
        current_step = estimator._load_global_step_from_checkpoint_dir(
            FLAGS.model_dir)  # pylint: disable=protected-access,line-too-long

        tf.logging.info(
            'Training for %d steps (%.2f epochs in total). Current'
            ' step %d.', FLAGS.train_steps,
            FLAGS.train_steps / params['steps_per_epoch'], current_step)

        start_timestamp = time.time(
        )  # This time will include compilation time

        if FLAGS.mode == 'train':
            hooks = []
            if FLAGS.use_async_checkpointing:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=FLAGS.model_dir,
                        save_steps=max(100, FLAGS.iterations_per_loop)))
            nas_est.train(input_fn=imagenet_train.input_fn,
                          max_steps=FLAGS.train_steps,
                          hooks=hooks)

        else:
            assert FLAGS.mode == 'train_and_eval'
            while current_step < FLAGS.train_steps:
                # Train for up to steps_per_eval number of steps.
                # At the end of training, a checkpoint will be written to --model_dir.
                next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                                      FLAGS.train_steps)
                nas_est.train(input_fn=imagenet_train.input_fn,
                              max_steps=next_checkpoint)
                current_step = next_checkpoint

                tf.logging.info(
                    'Finished training up to step %d. Elapsed seconds %d.',
                    next_checkpoint, int(time.time() - start_timestamp))

                # Evaluate the model on the most recent model in --model_dir.
                # Since evaluation happens in batches of --eval_batch_size, some images
                # may be excluded modulo the batch size. As long as the batch size is
                # consistent, the evaluated images are also consistent.
                tf.logging.info('Starting to evaluate.')
                eval_results = nas_est.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=FLAGS.num_eval_images // FLAGS.eval_batch_size)
                tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                                eval_results)

            elapsed_time = int(time.time() - start_timestamp)
            tf.logging.info(
                'Finished training up to step %d. Elapsed seconds %d.',
                FLAGS.train_steps, elapsed_time)
            if FLAGS.export_dir:
                export(nas_est, FLAGS.export_dir, FLAGS.post_quantize)
def main(unused_argv):
    params = hyperparameters.get_hyperparameters(FLAGS.default_hparams_file,
                                                 FLAGS.hparams_file, FLAGS,
                                                 FLAGS.hparams)
    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
        FLAGS.tpu if (FLAGS.tpu or params['use_tpu']) else '',
        zone=FLAGS.tpu_zone,
        project=FLAGS.gcp_project)

    if params['use_async_checkpointing']:
        save_checkpoints_steps = None
    else:
        save_checkpoints_steps = max(2500, params['iterations_per_loop'])
    config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=get_model_dir(params),
        save_checkpoints_steps=save_checkpoints_steps,
        keep_checkpoint_max=None,  # Keep all checkpoints.
        log_step_count_steps=FLAGS.log_step_count_steps,
        session_config=tf.ConfigProto(
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True))),
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=params['iterations_per_loop'],
            num_shards=params['num_cores'],
            # copybara:strip_begin
            tpu_job_name=FLAGS.tpu_job_name,
            # copybara:strip_end
            per_host_input_for_training=contrib_tpu.InputPipelineConfig
            .PER_HOST_V2))  # pylint: disable=line-too-long

    resnet_classifier = contrib_tpu.TPUEstimator(
        use_tpu=params['use_tpu'],
        model_fn=resnet_model_fn,
        config=config,
        params=params,
        train_batch_size=params['train_batch_size'],
        eval_batch_size=params['eval_batch_size'],
        export_to_tpu=FLAGS.export_to_tpu)

    # copybara:strip_begin
    if FLAGS.xla_compile:
        resnet_classifier = contrib_tpu.TPUEstimator(
            use_tpu=params['use_tpu'],
            model_fn=xla.estimator_model_fn(resnet_model_fn),
            config=config,
            params=params,
            train_batch_size=params['train_batch_size'],
            eval_batch_size=params['eval_batch_size'],
            export_to_tpu=FLAGS.export_to_tpu)
    # copybara:strip_end
    assert (params['precision'] == 'bfloat16' or params['precision']
            == 'float32'), ('Invalid value for precision parameter; '
                            'must be bfloat16 or float32.')
    tf.logging.info('Precision: %s', params['precision'])
    use_bfloat16 = params['precision'] == 'bfloat16'

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    if FLAGS.bigtable_instance:
        tf.logging.info('Using Bigtable dataset, table %s',
                        FLAGS.bigtable_table)
        select_train, select_eval = _select_tables_from_flags()
        imagenet_train = imagenet_input.ImageNetBigtableInput(
            is_training=True,
            use_bfloat16=use_bfloat16,
            transpose_input=params['transpose_input'],
            selection=select_train)
        imagenet_eval = imagenet_input.ImageNetBigtableInput(
            is_training=False,
            use_bfloat16=use_bfloat16,
            transpose_input=params['transpose_input'],
            selection=select_eval)
    else:
        if FLAGS.data_dir == FAKE_DATA_DIR:
            tf.logging.info('Using fake dataset.')
        else:
            tf.logging.info('Using dataset: %s', FLAGS.data_dir)
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetInput(
                is_training=is_training,
                data_dir=FLAGS.data_dir,
                transpose_input=params['transpose_input'],
                cache=params['use_cache'] and is_training,
                image_size=params['image_size'],
                num_parallel_calls=params['num_parallel_calls'],
                use_bfloat16=use_bfloat16) for is_training in [True, False]
        ]

    steps_per_epoch = params['num_train_images'] // params['train_batch_size']
    eval_steps = params['num_eval_images'] // params['eval_batch_size']

    if FLAGS.mode == 'eval':

        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(
                get_model_dir(params), timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            try:
                start_timestamp = time.time(
                )  # This time will include compilation time
                eval_results = resnet_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    checkpoint_path=ckpt)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(ckpt).split('-')[1])
                if current_step >= params['train_steps']:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    ckpt)

    elif FLAGS.mode == 'eval_igt':
        # IGT evaluation mode. Evaluate metrics for the desired parameters
        # (true or shifted) on the desired dataset (train or eval). Note that
        # train is still with data augmentation.

        # Get checkpoint file names.
        index_files = tf.gfile.Glob(
            os.path.join(get_model_dir(params), 'model.ckpt-*.index'))
        checkpoints = [fn[:-len('.index')] for fn in index_files]
        # Need to sort them to get proper tensorboard plotting (increasing event
        # timestamps correspond to increasing steps).
        checkpoint_steps = []
        for ckpt in checkpoints:
            tf.logging.info(ckpt)
            step_match = re.match(r'.*model.ckpt-([0-9]*)', ckpt)
            checkpoint_steps.append(int(step_match.group(1)))
        checkpoints = [
            ckpt for _, ckpt in sorted(zip(checkpoint_steps, checkpoints))
        ]
        tf.logging.info('There are {} checkpoints'.format(len(checkpoints)))
        tf.logging.info(', '.join(checkpoints))

        # Keep track of the last processed checkpoint (fault tolerance).
        analysis_state_path = os.path.join(
            get_model_dir(params),
            'analysis_state_' + FLAGS.igt_eval_set + '_' + FLAGS.igt_eval_mode)
        next_analysis_index = 0
        if tf.gfile.Exists(analysis_state_path):
            with tf.gfile.Open(analysis_state_path) as fd:
                next_analysis_index = int(fd.read())

        # Process each checkpoint.
        while next_analysis_index < len(checkpoints):
            tf.logging.info(
                'Next analysis index: {}'.format(next_analysis_index))
            ckpt_path = checkpoints[next_analysis_index]
            tf.logging.info('Starting to evaluate: {}.'.format(ckpt_path))
            start_timestamp = time.time(
            )  # This time will include compilation time

            if FLAGS.igt_eval_set == 'train':
                the_input_fn = imagenet_train.input_fn
                the_steps = steps_per_epoch
            elif FLAGS.igt_eval_set == 'eval':
                the_input_fn = imagenet_eval.input_fn
                the_steps = eval_steps
            else:
                raise ValueError('Unsupported igt_eval_set')

            eval_results = resnet_classifier.evaluate(
                input_fn=the_input_fn,
                steps=the_steps,
                checkpoint_path=ckpt_path,
                name=FLAGS.igt_eval_set + '_' + FLAGS.igt_eval_mode)
            elapsed_time = int(time.time() - start_timestamp)
            tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                            eval_results, elapsed_time)

            next_analysis_index += 1
            file_io.atomic_write_string_to_file(analysis_state_path,
                                                str(next_analysis_index))

    else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
        current_step = estimator._load_global_step_from_checkpoint_dir(
            get_model_dir(params))  # pylint:disable=protected-access,g-line-too-long
        steps_per_epoch = params['num_train_images'] // params[
            'train_batch_size']
        tf.logging.info(
            'Training for %d steps (%.2f epochs in total). Current'
            ' step %d.', params['train_steps'],
            params['train_steps'] / steps_per_epoch, current_step)

        start_timestamp = time.time(
        )  # This time will include compilation time

        if FLAGS.mode == 'train':
            hooks = []
            if params['use_async_checkpointing']:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=get_model_dir(params),
                        save_steps=max(2500, params['iterations_per_loop'])))
            resnet_classifier.train(input_fn=imagenet_train.input_fn,
                                    max_steps=params['train_steps'],
                                    hooks=hooks)

        else:
            assert FLAGS.mode == 'train_and_eval'
            while current_step < params['train_steps']:
                # Train for up to steps_per_eval number of steps.
                # At the end of training, a checkpoint will be written to --model_dir.
                next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                                      params['train_steps'])
                resnet_classifier.train(input_fn=imagenet_train.input_fn,
                                        max_steps=next_checkpoint)
                current_step = next_checkpoint

                tf.logging.info(
                    'Finished training up to step %d. Elapsed seconds %d.',
                    next_checkpoint, int(time.time() - start_timestamp))

                # Evaluate the model on the most recent model in --model_dir.
                # Since evaluation happens in batches of --eval_batch_size, some images
                # may be excluded modulo the batch size. As long as the batch size is
                # consistent, the evaluated images are also consistent.
                tf.logging.info('Starting to evaluate.')
                eval_results = resnet_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=params['num_eval_images'] //
                    params['eval_batch_size'])
                tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                                eval_results)

            elapsed_time = int(time.time() - start_timestamp)
            tf.logging.info(
                'Finished training up to step %d. Elapsed seconds %d.',
                params['train_steps'], elapsed_time)

        if FLAGS.export_dir is not None:
            # The guide to serve a exported TensorFlow model is at:
            #    https://www.tensorflow.org/serving/serving_basic
            tf.logging.info('Starting to export model.')
            unused_export_path = resnet_classifier.export_saved_model(
                export_dir_base=FLAGS.export_dir,
                serving_input_receiver_fn=imagenet_input.image_serving_input_fn
            )
def no_rewrite_session_config():
    rewriter_config = rewriter_config_pb2.RewriterConfig(
        disable_model_pruning=True,
        constant_folding=rewriter_config_pb2.RewriterConfig.OFF)
    graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
    return config_pb2.ConfigProto(graph_options=graph_options)
Example #18
0
def main():
    np.random.seed(0)
    tf.set_random_seed(0)

    dtype = np.float32
    # 64-bit doesn't help much, search for 64-bit in
    # https://www.wolframcloud.com/objects/5f297f41-30f7-4b1b-972c-cac8d1f8d8e4
    u.default_dtype = dtype
    machine_epsilon = np.finfo(dtype).eps  # 1e-7 or 1e-16
    train_images = load_MNIST.load_MNIST_images('data/train-images-idx3-ubyte')
    dsize = 10000
    patches = train_images[:, :dsize]
    fs = [dsize, 28 * 28, 196, 28 * 28]

    # values from deeplearning.stanford.edu/wiki/index.php/UFLDL_Tutorial
    X0 = patches
    lambda_ = 3e-3
    rho = tf.constant(0.1, dtype=dtype)
    beta = 3
    W0f = W_uniform(fs[2], fs[3])

    def f(i):
        return fs[i + 1]  # W[i] has shape f[i] x f[i-1]

    dsize = f(-1)
    n = len(fs) - 2

    # helper to create variables with numpy or TF initial value
    init_dict = {}  # {var_placeholder: init_value}
    vard = {}  # {var: util.VarInfo}

    def init_var(val, name, trainable=False, noinit=False):
        if isinstance(val, tf.Tensor):
            collections = [] if noinit else None
            var = tf.Variable(val, name=name, collections=collections)
        else:
            val = np.array(val)
            assert u.is_numeric, "Unknown type"
            holder = tf.placeholder(dtype,
                                    shape=val.shape,
                                    name=name + "_holder")
            var = tf.Variable(holder, name=name, trainable=trainable)
            init_dict[holder] = val
        var_p = tf.placeholder(var.dtype, var.shape)
        var_setter = var.assign(var_p)
        vard[var] = u.VarInfo(var_setter, var_p)
        return var

    lr = init_var(0.2, "lr")
    if purely_linear:  # need lower LR without sigmoids
        lr = init_var(.02, "lr")

    Wf = init_var(W0f, "Wf", True)
    Wf_copy = init_var(W0f, "Wf_copy", True)
    W = u.unflatten(Wf, fs[1:])  # perftodo: this creates transposes
    X = init_var(X0, "X")
    W.insert(0, X)

    def sigmoid(x):
        if not purely_linear:
            return tf.sigmoid(x)
        else:
            return tf.identity(x)

    def d_sigmoid(y):
        if not purely_linear:
            return y * (1 - y)
        else:
            return 1

    def kl(x, y):
        return x * tf.log(x / y) + (1 - x) * tf.log((1 - x) / (1 - y))

    def d_kl(x, y):
        return (1 - x) / (1 - y) - x / y

    # A[i] = activations needed to compute gradient of W[i]
    # A[n+1] = network output
    A = [None] * (n + 2)

    # A[0] is just for shape checks, assert fail on run
    # tf.assert always fails because of static assert
    # fail_node = tf.assert_equal(1, 0, message="too huge")
    fail_node = tf.Print(0, [0], "fail, this must never run")
    with tf.control_dependencies([fail_node]):
        A[0] = u.Identity(dsize, dtype=dtype)
    A[1] = W[0]
    for i in range(1, n + 1):
        A[i + 1] = sigmoid(W[i] @ A[i])

    # reconstruction error and sparsity error
    err = (A[3] - A[1])
    rho_hat = tf.reduce_sum(A[2], axis=1, keep_dims=True) / dsize

    # B[i] = backprops needed to compute gradient of W[i]
    # B2[i] = backprops from sampled labels needed for natural gradient
    B = [None] * (n + 1)
    B2 = [None] * (n + 1)
    B[n] = err * d_sigmoid(A[n + 1])
    sampled_labels_live = tf.random_normal((f(n), f(-1)), dtype=dtype, seed=0)
    sampled_labels = init_var(sampled_labels_live,
                              "sampled_labels",
                              noinit=True)
    B2[n] = sampled_labels * d_sigmoid(A[n + 1])
    for i in range(n - 1, -1, -1):
        backprop = t(W[i + 1]) @ B[i + 1]
        backprop2 = t(W[i + 1]) @ B2[i + 1]
        if i == 1 and not drop_sparsity:
            backprop += beta * d_kl(rho, rho_hat)
            backprop2 += beta * d_kl(rho, rho_hat)
        B[i] = backprop * d_sigmoid(A[i + 1])
        B2[i] = backprop2 * d_sigmoid(A[i + 1])

    # dW[i] = gradient of W[i]
    dW = [None] * (n + 1)
    pre_dW = [None] * (n + 1)  # preconditioned dW
    pre_dW_stable = [None] * (n + 1)  # preconditioned stable dW

    cov_A = [None] * (n + 1)  # covariance of activations[i]
    cov_B2 = [None] * (n + 1)  # covariance of synthetic backprops[i]
    vars_svd_A = [None] * (n + 1)
    vars_svd_B2 = [None] * (n + 1)
    for i in range(1, n + 1):
        cov_A[i] = init_var(A[i] @ t(A[i]) / dsize, "cov_A%d" % (i, ))
        cov_B2[i] = init_var(B2[i] @ t(B2[i]) / dsize, "cov_B2%d" % (i, ))
        vars_svd_A[i] = u.SvdWrapper(cov_A[i], "svd_A_%d" % (i, ))
        vars_svd_B2[i] = u.SvdWrapper(cov_B2[i], "svd_B2_%d" % (i, ))
        if use_tikhonov:
            whitened_A = u.regularized_inverse2(vars_svd_A[i], L=Lambda) @ A[i]
        else:
            whitened_A = u.pseudo_inverse2(vars_svd_A[i]) @ A[i]
        if use_tikhonov:
            whitened_B2 = u.regularized_inverse2(vars_svd_B2[i],
                                                 L=Lambda) @ B[i]
        else:
            whitened_B2 = u.pseudo_inverse2(vars_svd_B2[i]) @ B[i]
        whitened_A_stable = u.pseudo_inverse_sqrt2(vars_svd_A[i]) @ A[i]
        whitened_B2_stable = u.pseudo_inverse_sqrt2(vars_svd_B2[i]) @ B[i]
        pre_dW[i] = (whitened_B2 @ t(whitened_A)) / dsize
        pre_dW_stable[i] = (whitened_B2_stable @ t(whitened_A_stable)) / dsize
        dW[i] = (B[i] @ t(A[i])) / dsize

    # Loss function
    reconstruction = u.L2(err) / (2 * dsize)
    sparsity = beta * tf.reduce_sum(kl(rho, rho_hat))
    L2 = (lambda_ / 2) * (u.L2(W[1]) + u.L2(W[1]))

    loss = reconstruction
    if not drop_l2:
        loss = loss + L2
    if not drop_sparsity:
        loss = loss + sparsity

    grad_live = u.flatten(dW[1:])
    pre_grad_live = u.flatten(pre_dW[1:])  # fisher preconditioned gradient
    pre_grad_stable_live = u.flatten(
        pre_dW_stable[1:])  # sqrt fisher preconditioned grad
    grad = init_var(grad_live, "grad")
    pre_grad = init_var(pre_grad_live, "pre_grad")
    pre_grad_stable = init_var(pre_grad_stable_live, "pre_grad_stable")

    update_params_op = Wf.assign(Wf - lr * pre_grad).op
    update_params_stable_op = Wf.assign(Wf - lr * pre_grad_stable).op
    save_params_op = Wf_copy.assign(Wf).op
    pre_grad_dot_grad = tf.reduce_sum(pre_grad * grad)
    pre_grad_stable_dot_grad = tf.reduce_sum(pre_grad * grad)
    grad_norm = tf.reduce_sum(grad * grad)
    pre_grad_norm = u.L2(pre_grad)
    pre_grad_stable_norm = u.L2(pre_grad_stable)

    def dump_svd_info(step):
        """Dump singular values and gradient values in those coordinates."""
        for i in range(1, n + 1):
            svd = vars_svd_A[i]
            s0, u0, v0 = sess.run([svd.s, svd.u, svd.v])
            util.dump(s0, "A_%d_%d" % (i, step))
            A0 = A[i].eval()
            At0 = v0.T @ A0
            util.dump(A0 @ A0.T, "Acov_%d_%d" % (i, step))
            util.dump(At0 @ At0.T, "Atcov_%d_%d" % (i, step))
            util.dump(s0, "As_%d_%d" % (i, step))

        for i in range(1, n + 1):
            svd = vars_svd_B2[i]
            s0, u0, v0 = sess.run([svd.s, svd.u, svd.v])
            util.dump(s0, "B2_%d_%d" % (i, step))
            B0 = B[i].eval()
            Bt0 = v0.T @ B0
            util.dump(B0 @ B0.T, "Bcov_%d_%d" % (i, step))
            util.dump(Bt0 @ Bt0.T, "Btcov_%d_%d" % (i, step))
            util.dump(s0, "Bs_%d_%d" % (i, step))

    def advance_batch():
        sess.run(sampled_labels.initializer)  # new labels for next call

    def update_covariances():
        ops_A = [cov_A[i].initializer for i in range(1, n + 1)]
        ops_B2 = [cov_B2[i].initializer for i in range(1, n + 1)]
        sess.run(ops_A + ops_B2)

    def update_svds():
        if whitening_mode > 1:
            vars_svd_A[2].update()
        if whitening_mode > 2:
            vars_svd_B2[2].update()
        if whitening_mode > 3:
            vars_svd_B2[1].update()

    def init_svds():
        """Initialize our SVD to identity matrices."""
        ops = []
        for i in range(1, n + 1):
            ops.extend(vars_svd_A[i].init_ops)
            ops.extend(vars_svd_B2[i].init_ops)
        sess = tf.get_default_session()
        sess.run(ops)

    init_op = tf.global_variables_initializer()
    #  tf.get_default_graph().finalize()

    from tensorflow.core.protobuf import rewriter_config_pb2

    rewrite_options = rewriter_config_pb2.RewriterConfig(
        disable_model_pruning=True,
        constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
        memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL)
    optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)
    graph_options = tf.GraphOptions(optimizer_options=optimizer_options,
                                    rewrite_options=rewrite_options)
    config = tf.ConfigProto(graph_options=graph_options)
    #sess = tf.Session(config=config)
    sess = tf.InteractiveSession(config=config)
    sess.run(Wf.initializer, feed_dict=init_dict)
    sess.run(X.initializer, feed_dict=init_dict)
    advance_batch()
    update_covariances()
    init_svds()
    sess.run(init_op, feed_dict=init_dict)  # initialize everything else

    print("Running training.")
    u.reset_time()

    step_lengths = []  # keep track of learning rates
    losses = []
    ratios = []  # actual loss decrease / expected decrease
    grad_norms = []
    pre_grad_norms = []  # preconditioned grad norm squared
    pre_grad_stable_norms = []  # sqrt preconditioned grad norms squared
    target_delta_list = []  # predicted decrease linear approximation
    target_delta2_list = []  # predicted decrease quadratic appromation
    actual_delta_list = []  # actual decrease

    # adaptive line search parameters
    alpha = 0.3  # acceptable fraction of predicted decrease
    beta = 0.8  # how much to shrink when violation
    growth_rate = 1.05  # how much to grow when too conservative

    def update_cov_A(i):
        sess.run(cov_A[i].initializer)

    def update_cov_B2(i):
        sess.run(cov_B2[i].initializer)

    # only update whitening matrix of input activations in the beginning
    if whitening_mode > 0:
        vars_svd_A[1].update()

    # compute t(delta).H.delta/2
    def hessian_quadratic(delta):
        #    update_covariances()
        W = u.unflatten(delta, fs[1:])
        W.insert(0, None)
        total = 0
        for l in range(1, n + 1):
            decrement = tf.trace(t(W[l]) @ cov_B2[l] @ W[l] @ cov_A[l])
            total += decrement
        return (total / 2).eval()

    # compute t(delta).H^-1.delta/2
    def hessian_quadratic_inv(delta):
        #    update_covariances()
        W = u.unflatten(delta, fs[1:])
        W.insert(0, None)
        total = 0
        for l in range(1, n + 1):
            invB2 = u.pseudo_inverse2(vars_svd_B2[l])
            invA = u.pseudo_inverse2(vars_svd_A[l])
            decrement = tf.trace(t(W[l]) @ invB2 @ W[l] @ invA)
            total += decrement
        return (total / 2).eval()

    # do line search, dump values as csv
    def line_search(initial_value, direction, step, num_steps):
        saved_val = tf.Variable(Wf)
        sess.run(saved_val.initializer)
        pl = tf.placeholder(dtype, shape=(), name="linesearch_p")
        assign_op = Wf.assign(initial_value - direction * step * pl)
        vals = []
        for i in range(num_steps):
            sess.run(assign_op, feed_dict={pl: i})
            vals.append(loss.eval())
        sess.run(Wf.assign(saved_val))  # restore original value
        return vals

    for step in range(num_steps):
        update_covariances()
        if step % whiten_every_n_steps == 0:
            update_svds()

        sess.run(grad.initializer)
        sess.run(pre_grad.initializer)

        lr0, loss0 = sess.run([lr, loss])
        save_params_op.run()

        # regular inverse becomes unstable when grad norm exceeds 1
        stabilized_mode = grad_norm.eval() < 1

        if stabilized_mode and not use_tikhonov:
            update_params_stable_op.run()
        else:
            update_params_op.run()

        loss1 = loss.eval()
        advance_batch()

        # line search stuff
        target_slope = (-pre_grad_dot_grad.eval() if stabilized_mode else
                        -pre_grad_stable_dot_grad.eval())
        target_delta = lr0 * target_slope
        target_delta_list.append(target_delta)

        # second order prediction of target delta
        # TODO: the sign is wrong, debug this
        # https://www.wolframcloud.com/objects/8f287f2f-ceb7-42f7-a599-1c03fda18f28
        if local_quadratics:
            x0 = Wf_copy.eval()
            x_opt = x0 - pre_grad.eval()
            # computes t(x)@H^-1 @(x)/2
            y_opt = loss0 - hessian_quadratic_inv(grad)
            # computes t(x)@H @(x)/2
            y_expected = hessian_quadratic(Wf - x_opt) + y_opt
            target_delta2 = y_expected - loss0
            target_delta2_list.append(target_delta2)

        actual_delta = loss1 - loss0
        actual_slope = actual_delta / lr0
        slope_ratio = actual_slope / target_slope  # between 0 and 1.01
        actual_delta_list.append(actual_delta)

        if do_line_search:
            vals1 = line_search(Wf_copy, pre_grad, lr / 100, 40)
            vals2 = line_search(Wf_copy, grad, lr / 100, 40)
            u.dump(vals1, "line1-%d" % (i, ))
            u.dump(vals2, "line2-%d" % (i, ))

        losses.append(loss0)
        step_lengths.append(lr0)
        ratios.append(slope_ratio)
        grad_norms.append(grad_norm.eval())
        pre_grad_norms.append(pre_grad_norm.eval())
        pre_grad_stable_norms.append(pre_grad_stable_norm.eval())

        if step % report_frequency == 0:
            print(
                "Step %d loss %.2f, target decrease %.3f, actual decrease, %.3f ratio %.2f grad norm: %.2f pregrad norm: %.2f"
                % (step, loss0, target_delta, actual_delta, slope_ratio,
                   grad_norm.eval(), pre_grad_norm.eval()))

        if adaptive_step_frequency and adaptive_step and step > adaptive_step_burn_in:
            # shrink if wrong prediction, don't shrink if prediction is tiny
            if slope_ratio < alpha and abs(
                    target_delta) > 1e-6 and adaptive_step:
                print("%.2f %.2f %.2f" % (loss0, loss1, slope_ratio))
                print(
                    "Slope optimality %.2f, shrinking learning rate to %.2f" %
                    (
                        slope_ratio,
                        lr0 * beta,
                    ))
                sess.run(vard[lr].setter, feed_dict={vard[lr].p: lr0 * beta})

            # grow learning rate, slope_ratio .99 worked best for gradient
            elif step > 0 and i % 50 == 0 and slope_ratio > 0.90 and adaptive_step:
                print("%.2f %.2f %.2f" % (loss0, loss1, slope_ratio))
                print("Growing learning rate to %.2f" % (lr0 * growth_rate))
                sess.run(vard[lr].setter,
                         feed_dict={vard[lr].p: lr0 * growth_rate})

        u.record_time()

    # check against expected loss
    if 'Apple' in sys.version:
        pass
        #    u.dump(losses, "kfac_small_final_mac.csv")
        targets = np.loadtxt("data/kfac_small_final_mac.csv", delimiter=",")
    else:
        pass
        #    u.dump(losses, "kfac_small_final_linux.csv")
        targets = np.loadtxt("data/kfac_small_final_linux.csv", delimiter=",")

    u.check_equal(targets, losses[:len(targets)], rtol=1e-1)
    u.summarize_time()
    print("Test passed")
Example #19
0
def main(unused_argv):
    params = params_dict.ParamsDict(resnet_config.RESNET_CFG,
                                    resnet_config.RESNET_RESTRICTIONS)
    params = params_dict.override_params_dict(params,
                                              FLAGS.config_file,
                                              is_strict=True)
    params = params_dict.override_params_dict(params,
                                              FLAGS.params_override,
                                              is_strict=True)

    params = flags_to_params.override_params_from_input_flags(params, FLAGS)

    params.validate()
    params.lock()

    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu if (FLAGS.tpu or params.use_tpu) else '',
        zone=FLAGS.tpu_zone,
        project=FLAGS.gcp_project)

    if params.use_async_checkpointing:
        save_checkpoints_steps = None
    else:
        save_checkpoints_steps = max(5000, params.iterations_per_loop)

    config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=save_checkpoints_steps,
        log_step_count_steps=FLAGS.log_step_count_steps,
        keep_checkpoint_max=1000,
        session_config=tf.ConfigProto(
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True))),
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=params.iterations_per_loop,
            num_shards=params.num_cores,
            per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
            .PER_HOST_V2))  # pylint: disable=line-too-long

    resnet_classifier = tf.contrib.tpu.TPUEstimator(
        use_tpu=params.use_tpu,
        model_fn=resnet_model_fn,
        config=config,
        params=params.as_dict(),
        train_batch_size=params.train_batch_size,
        eval_batch_size=params.eval_batch_size,
        export_to_tpu=FLAGS.export_to_tpu)

    assert (params.precision == 'bfloat16' or params.precision
            == 'float32'), ('Invalid value for precision parameter; '
                            'must be bfloat16 or float32.')
    tf.logging.info('Precision: %s', params.precision)
    use_bfloat16 = params.precision == 'bfloat16'

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    if FLAGS.bigtable_instance:
        tf.logging.info('Using Bigtable dataset, table %s',
                        FLAGS.bigtable_table)
        select_train, select_eval = _select_tables_from_flags()
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetBigtableInput(
                is_training=is_training,
                use_bfloat16=use_bfloat16,
                transpose_input=params.transpose_input,
                selection=selection)
            for (is_training,
                 selection) in [(True, select_train), (False, select_eval)]
        ]
    else:
        if FLAGS.data_dir == FAKE_DATA_DIR:
            tf.logging.info('Using fake dataset.')
        else:
            tf.logging.info('Using dataset: %s', FLAGS.data_dir)
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetInput(
                is_training=is_training,
                data_dir=FLAGS.data_dir,
                transpose_input=params.transpose_input,
                cache=params.use_cache and is_training,
                image_size=params.image_size,
                num_parallel_calls=params.num_parallel_calls,
                include_background_label=(params.num_label_classes == 1001),
                use_bfloat16=use_bfloat16) for is_training in [True, False]
        ]

    steps_per_epoch = params.num_train_images // params.train_batch_size
    eval_steps = params.num_eval_images // params.eval_batch_size

    if FLAGS.mode == 'eval':

        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(
                FLAGS.model_dir, timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            try:
                start_timestamp = time.time(
                )  # This time will include compilation time
                eval_results = resnet_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    checkpoint_path=ckpt)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(ckpt).split('-')[1])
                if current_step >= params.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    ckpt)

    else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
        current_step = estimator._load_global_step_from_checkpoint_dir(
            FLAGS.model_dir)  # pylint: disable=protected-access,line-too-long
        steps_per_epoch = params.num_train_images // params.train_batch_size
        tf.logging.info(
            'Training for %d steps (%.2f epochs in total). Current'
            ' step %d.', params.train_steps,
            params.train_steps / steps_per_epoch, current_step)

        start_timestamp = time.time(
        )  # This time will include compilation time

        if FLAGS.mode == 'train':
            hooks = []
            if params.use_async_checkpointing:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=FLAGS.model_dir,
                        save_steps=max(5000, params.iterations_per_loop)))
            if FLAGS.profile_every_n_steps > 0:
                hooks.append(
                    tpu_profiler_hook.TPUProfilerHook(
                        save_steps=FLAGS.profile_every_n_steps,
                        output_dir=FLAGS.model_dir,
                        tpu=FLAGS.tpu))
            resnet_classifier.train(input_fn=imagenet_train.input_fn,
                                    max_steps=params.train_steps,
                                    hooks=hooks)

        else:
            assert FLAGS.mode == 'train_and_eval'
            while current_step < params.train_steps:
                # Train for up to steps_per_eval number of steps.
                # At the end of training, a checkpoint will be written to --model_dir.
                next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                                      params.train_steps)
                resnet_classifier.train(input_fn=imagenet_train.input_fn,
                                        max_steps=next_checkpoint)
                current_step = next_checkpoint

                tf.logging.info(
                    'Finished training up to step %d. Elapsed seconds %d.',
                    next_checkpoint, int(time.time() - start_timestamp))

                # Evaluate the model on the most recent model in --model_dir.
                # Since evaluation happens in batches of --eval_batch_size, some images
                # may be excluded modulo the batch size. As long as the batch size is
                # consistent, the evaluated images are also consistent.
                tf.logging.info('Starting to evaluate.')
                eval_results = resnet_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=params.num_eval_images // params.eval_batch_size)
                tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                                eval_results)

            elapsed_time = int(time.time() - start_timestamp)
            tf.logging.info(
                'Finished training up to step %d. Elapsed seconds %d.',
                params.train_steps, elapsed_time)

        if FLAGS.export_dir is not None:
            # The guide to serve a exported TensorFlow model is at:
            #    https://www.tensorflow.org/serving/serving_basic
            tf.logging.info('Starting to export model.')
            export_path = resnet_classifier.export_saved_model(
                export_dir_base=FLAGS.export_dir,
                serving_input_receiver_fn=imagenet_input.image_serving_input_fn
            )
            if FLAGS.add_warmup_requests:
                inference_warmup.write_warmup_requests(
                    export_path,
                    FLAGS.model_name,
                    params.image_size,
                    batch_sizes=FLAGS.inference_batch_sizes,
                    image_format='JPEG')
Example #20
0
 def _GetMemoryOptimizerSessionConfig(self):
   rewrite_options = rewriter_config_pb2.RewriterConfig(
       disable_model_pruning=True,
       memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS)
   graph_options = config_pb2.GraphOptions(rewrite_options=rewrite_options)
   return config_pb2.ConfigProto(graph_options=graph_options)
Example #21
0
  def test_unifiedRNN_with_cond(self):
    # This test is to demonstrate the graph rewrite of grappler plugin under
    # the condition that the function returns different number of internal
    # states.
    rewrites = rewriter_config_pb2.RewriterConfig()
    rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF
    customer_optimizer = rewrites.custom_optimizers.add()
    customer_optimizer.name = 'ExperimentalImplementationSelector'
    rewrites.min_graph_nodes = -1
    graph_options = config_pb2.GraphOptions(rewrite_options=rewrites)
    config = config_pb2.ConfigProto(graph_options=graph_options)

    input_shape = 10
    rnn_state_size = 8
    output_shape = 8
    timestep = 4
    batch = 100
    epoch = 1

    with ops.Graph().as_default(), session.Session(config=config) as sess:
      (x_train, y_train), _ = testing_utils.get_test_data(
          train_samples=batch,
          test_samples=0,
          input_shape=(timestep, input_shape),
          num_classes=output_shape)
      y_train = keras.utils.to_categorical(y_train)

      layer = UnifiedLSTM(rnn_state_size)

      inputs = array_ops.placeholder(
          dtypes.float32, shape=(None, timestep, input_shape), name='inputs')
      predict = array_ops.placeholder(
          dtypes.float32, shape=(None, output_shape), name='predict')

      zeros = array_ops.zeros([batch, output_shape])
      dummy_runtime = constant_op.constant(
          'unknown', dtype=dtypes.string, name='runtime')
      a = constant_op.constant(0)
      b = constant_op.constant(1)
      # Will always run the lstm layer.
      outputs, runtime = control_flow_ops.cond(
          gen_math_ops.less(a, b),
          lambda: layer(inputs),
          lambda: (zeros, dummy_runtime))
      loss = losses.softmax_cross_entropy(predict, outputs)
      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
      train_op = optimizer.minimize(loss)

      sess.run([variables.global_variables_initializer()])
      existing_loss = 0

      for _ in range(epoch):
        loss_value, _, runtime_value = sess.run([loss, train_op, runtime], {
            inputs: x_train,
            predict: y_train
        })
        if test.is_gpu_available():
          self.assertEquals(runtime_value, b'cudnn')
        else:
          self.assertEquals(runtime_value, b'cpu')
        # Make sure the loss is updated for every epoch
        # (layer weights properly updated).
        self.assertNotEqual(existing_loss, loss_value)
        existing_loss = loss_value
Example #22
0
def main(unused_argv):
  params = params_dict.ParamsDict(
      mnasnet_config.MNASNET_CFG, mnasnet_config.MNASNET_RESTRICTIONS)
  params = params_dict.override_params_dict(
      params, FLAGS.config_file, is_strict=True)
  params = params_dict.override_params_dict(
      params, FLAGS.params_override, is_strict=True)

  params = flags_to_params.override_params_from_input_flags(params, FLAGS)

  additional_params = {
      'steps_per_epoch': params.num_train_images / params.train_batch_size,
      'quantized_training': FLAGS.quantized_training,
  }

  params = params_dict.override_params_dict(
      params, additional_params, is_strict=False)

  params.validate()
  params.lock()

  if FLAGS.tpu or params.use_tpu:
    tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
  else:
    tpu_cluster_resolver = None

  if params.use_async_checkpointing:
    save_checkpoints_steps = None
  else:
    save_checkpoints_steps = max(100, params.iterations_per_loop)
  config = tf.estimator.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      model_dir=FLAGS.model_dir,
      save_checkpoints_steps=save_checkpoints_steps,
      log_step_count_steps=FLAGS.log_step_count_steps,
      session_config=tf.ConfigProto(
          graph_options=tf.GraphOptions(
              rewrite_options=rewriter_config_pb2.RewriterConfig(
                  disable_meta_optimizer=True))),
      tpu_config=tf.estimator.tpu.TPUConfig(
          iterations_per_loop=params.iterations_per_loop,
          per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig
          .PER_HOST_V2))  # pylint: disable=line-too-long

  # Validates Flags.
  if params.precision == 'bfloat16' and params.use_keras:
    raise ValueError(
        'Keras layers do not have full support to bfloat16 activation training.'
        ' You have set precision as %s and use_keras as %s' %
        (params.precision, params.use_keras))

  # Initializes model parameters.
  mnasnet_est = tf.estimator.tpu.TPUEstimator(
      use_tpu=params.use_tpu,
      model_fn=build_model_fn,
      config=config,
      train_batch_size=params.train_batch_size,
      eval_batch_size=params.eval_batch_size,
      export_to_tpu=FLAGS.export_to_tpu,
      params=params.as_dict())

  if FLAGS.mode == 'export_only':
    export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize)
    return

  # Input pipelines are slightly different (with regards to shuffling and
  # preprocessing) between training and evaluation.
  if FLAGS.bigtable_instance:
    tf.logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table)
    select_train, select_eval = _select_tables_from_flags()
    imagenet_train, imagenet_eval = [imagenet_input.ImageNetBigtableInput(
        is_training=is_training,
        use_bfloat16=False,
        transpose_input=params.transpose_input,
        selection=selection) for (is_training, selection) in
                                     [(True, select_train),
                                      (False, select_eval)]]
  else:
    if FLAGS.data_dir == FAKE_DATA_DIR:
      tf.logging.info('Using fake dataset.')
    else:
      tf.logging.info('Using dataset: %s', FLAGS.data_dir)
    imagenet_train, imagenet_eval = [
        imagenet_input.ImageNetInput(
            is_training=is_training,
            data_dir=FLAGS.data_dir,
            transpose_input=params.transpose_input,
            cache=params.use_cache and is_training,
            image_size=params.input_image_size,
            num_parallel_calls=params.num_parallel_calls,
            use_bfloat16=(params.precision == 'bfloat16')) for is_training in [True, False]
    ]

  if FLAGS.mode == 'eval':
    eval_steps = params.num_eval_images // params.eval_batch_size
    # Run evaluation when there's a new checkpoint
    for ckpt in tf.train.checkpoints_iterator(
        FLAGS.model_dir, timeout=FLAGS.eval_timeout):
      tf.logging.info('Starting to evaluate.')
      try:
        start_timestamp = time.time()  # This time will include compilation time
        eval_results = mnasnet_est.evaluate(
            input_fn=imagenet_eval.input_fn,
            steps=eval_steps,
            checkpoint_path=ckpt)
        elapsed_time = int(time.time() - start_timestamp)
        tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results,
                        elapsed_time)
        mnas_utils.archive_ckpt(eval_results, eval_results['top_1_accuracy'], ckpt)

        # Terminate eval job when final checkpoint is reached
        current_step = int(os.path.basename(ckpt).split('-')[1])
        if current_step >= params.train_steps:
          tf.logging.info('Evaluation finished after training step %d',
                          current_step)
          break

      except tf.errors.NotFoundError:
        # Since the coordinator is on a different job than the TPU worker,
        # sometimes the TPU worker does not finish initializing until long after
        # the CPU job tells it to start evaluating. In this case, the checkpoint
        # file could have been deleted already.
        tf.logging.info('Checkpoint %s no longer exists, skipping checkpoint',
                        ckpt)

    if FLAGS.export_dir:
      export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize)
  else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
    try:
      current_step = tf.train.load_variable(FLAGS.model_dir,
                                            tf.GraphKeys.GLOBAL_STEP)
    except (TypeError, ValueError, tf.errors.NotFoundError):
      current_step = 0

    tf.logging.info(
        'Training for %d steps (%.2f epochs in total). Current'
        ' step %d.', params.train_steps,
        params.train_steps / params.steps_per_epoch, current_step)

    start_timestamp = time.time()  # This time will include compilation time

    if FLAGS.mode == 'train':
      hooks = []
      if params.use_async_checkpointing:
        try:
          from tensorflow.contrib.tpu.python.tpu import async_checkpoint  # pylint: disable=g-import-not-at-top
        except ImportError as e:
          logging.exception(
              'Async checkpointing is not supported in TensorFlow 2.x')
          raise e

        hooks.append(
            async_checkpoint.AsyncCheckpointSaverHook(
                checkpoint_dir=FLAGS.model_dir,
                save_steps=max(100, params.iterations_per_loop)))
      mnasnet_est.train(
          input_fn=imagenet_train.input_fn,
          max_steps=params.train_steps,
          hooks=hooks)

    else:
      assert FLAGS.mode == 'train_and_eval'
      while current_step < params.train_steps:
        # Train for up to steps_per_eval number of steps.
        # At the end of training, a checkpoint will be written to --model_dir.
        next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                              params.train_steps)
        mnasnet_est.train(
            input_fn=imagenet_train.input_fn, max_steps=next_checkpoint)
        current_step = next_checkpoint

        tf.logging.info('Finished training up to step %d. Elapsed seconds %d.',
                        next_checkpoint, int(time.time() - start_timestamp))

        # Evaluate the model on the most recent model in --model_dir.
        # Since evaluation happens in batches of --eval_batch_size, some images
        # may be excluded modulo the batch size. As long as the batch size is
        # consistent, the evaluated images are also consistent.
        tf.logging.info('Starting to evaluate.')
        eval_results = mnasnet_est.evaluate(
            input_fn=imagenet_eval.input_fn,
            steps=params.num_eval_images // params.eval_batch_size)
        tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                        eval_results)
        ckpt = tf.train.latest_checkpoint(FLAGS.model_dir)
        mnas_utils.archive_ckpt(eval_results, eval_results['top_1_accuracy'], ckpt)

      elapsed_time = int(time.time() - start_timestamp)
      tf.logging.info('Finished training up to step %d. Elapsed seconds %d.',
                      params.train_steps, elapsed_time)
      if FLAGS.export_dir:
        export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize)
Example #23
0
from tensorflow.python.framework import random_seed
from tensorflow.python.framework import test_util
from tensorflow.python.keras import keras_parameterized
from tensorflow.python.keras import testing_utils
from tensorflow.python.keras.layers import recurrent as rnn_v1
from tensorflow.python.keras.layers import recurrent_v2 as rnn
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.platform import test
from tensorflow.python.training import gradient_descent

# Global config for grappler setting that is used for graph mode test.
_rewrites = rewriter_config_pb2.RewriterConfig()
_rewrites.implementation_selector = rewriter_config_pb2.RewriterConfig.ON
_rewrites.min_graph_nodes = -1
_graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites)
_config = config_pb2.ConfigProto(graph_options=_graph_options)


@keras_parameterized.run_all_keras_modes(config=_config)
class GRUV2Test(keras_parameterized.TestCase):
    @parameterized.named_parameters(
        ('non_tan_activation', 'relu', 'sigmoid', 0, False, True, True),
        ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True, True),
        ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True, True),
        ('unroll', 'tanh', 'sigmoid', 0, True, True, True),
        ('not_use_bias', 'tanh', 'sigmoid', 0, False, False, True),
        ('not_reset_after', 'tanh', 'sigmoid', 0, False, True, False))
Example #24
0
def PlaceGraph(metagraph,
               cluster=None,
               allotted_time=3600,
               hparams=None,
               verbose=False):
    """Place the provided metagraph.

  Args:
    metagraph: the metagraph to place.
    cluster: an optional set of hardware resource to optimize the placement for.
      If none is specified, we'll optimize the placement for the hardware
      available on the local machine.
    allotted_time: the maximum amount to time in seconds to spend optimizing
      the placement.
    hparams: hyperparameters used to fine tune the placer.
    verbose: prints debug information if True.

  Returns:
    The placed metagraph.
  """
    if cluster is None:
        cluster = gcluster.Cluster()

    # Optimize the metagraph to speedup the placement
    rewriter_config = rewriter_config_pb2.RewriterConfig()
    rewriter_config.optimizers.append("pruning")
    rewriter_config.optimizers.append("constfold")
    rewriter_config.optimizers.append("arithmetic")
    rewriter_config.optimizers.append("dependency")
    rewriter_config.optimizers.append("pruning")
    optimized_graph = tf_optimizer.OptimizeGraph(rewriter_config,
                                                 metagraph,
                                                 verbose=verbose,
                                                 cluster=cluster)
    optimized_metagraph = meta_graph_pb2.MetaGraphDef()
    optimized_metagraph.CopyFrom(metagraph)
    optimized_metagraph.graph_def.CopyFrom(optimized_graph)

    item = gitem.Item(optimized_metagraph)

    # Measure the runtime achievable with the original placement.
    try:
        _, original_run_time, _ = cluster.MeasureCosts(item)
        if verbose:
            print("Runtime for original placement: " + str(original_run_time))
    except errors.OpError as e:
        if verbose:
            print("Original placement isn't feasible: " + str(e))
        original_run_time = hparams.failing_signal

    if hparams is None:
        hparams = hierarchical_controller.hierarchical_controller_hparams()
    # We run with a single child
    hparams.num_children = 1

    with tf_ops.Graph().as_default():
        # Place all the nodes of the controller on the CPU. We don't want them to
        # fight for accelerator memory with the model to optimize.
        with tf_ops.device("/device:CPU:0"):
            model = hierarchical_controller.HierarchicalController(
                hparams, item, cluster)
            ops = model.build_controller()
            session_creator = training.ChiefSessionCreator()
            with training.MonitoredSession(
                    session_creator=session_creator) as sess:
                start_time = time.time()
                current_time = start_time
                while current_time - start_time < allotted_time:
                    grouping_actions = model.generate_grouping(sess)
                    input_to_seq2seq = model.create_group_embeddings(
                        grouping_actions, verbose=verbose)
                    model.generate_placement(input_to_seq2seq, sess)
                    try:
                        run_time = model.eval_placement(sess, verbose=verbose)
                    except errors.OpError as e:
                        if verbose:
                            print("Failed to run graph:" + str(e))
                        run_time = hparams.failing_signal
                    updated = model.update_reward(sess,
                                                  run_time,
                                                  verbose=verbose)
                    if updated and run_time < original_run_time:
                        if verbose:
                            print("Found better placement, with runtime " +
                                  str(run_time))
                        model.export_placement(metagraph)

                    model.process_reward(sess)

                    current_time = time.time()

    return metagraph