Esempio n. 1
0
  def __init__(self, data_location, subset, input_height, input_width,
                batch_size, num_cores, resize_method='crop', mean_value=[0.0,0.0,0.0], label_adjust=False):
    """dataloader generator

    Args:
        data_location (str): tf recorder local path
        subset (str): training or validation part
        input_height (int): input image size
        input_width (int): input image size
        batch_size (int): dataloader batch size
        num_cores (int): parallel 
        resize_method (str, optional): data preprocession methods. Defaults to 'crop'.
        mean_value (list, optional): data mean value. Defaults to [0.0,0.0,0.0].
        label_adjust (bool, optional): adjust the label value. Defaults to False.
    """
    self.batch_size = batch_size
    self.subset = subset
    self.dataset = datasets.ImagenetData(data_location)
    self.total_image = self.dataset.num_examples_per_epoch(self.subset)
    self.preprocessor = self.dataset.get_image_preprocessor()(
        input_height,
        input_width,
        batch_size,
        num_cores,
        resize_method,
        mean_value)
    self.label_adjust = label_adjust
    self.n = int(self.total_image / self.batch_size)
Esempio n. 2
0
    def run(self):
        """run benchmark with optimized graph"""

        print("Run inference")

        data_config = tf.compat.v1.ConfigProto()
        data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads
        data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads
        data_config.use_per_session_threads = 1

        infer_config = tf.compat.v1.ConfigProto()
        infer_config.intra_op_parallelism_threads = self.args.num_intra_threads
        infer_config.inter_op_parallelism_threads = self.args.num_inter_threads
        infer_config.use_per_session_threads = 1

        data_graph = tf.Graph()
        with data_graph.as_default():
            if (self.args.data_location):
                print("Inference with real data.")
                dataset = datasets.ImagenetData(self.args.data_location)
                preprocessor = dataset.get_image_preprocessor()(
                    INCEPTION_V3_IMAGE_SIZE,
                    INCEPTION_V3_IMAGE_SIZE,
                    self.args.batch_size,
                    num_cores=self.args.num_cores,
                    resize_method='bilinear')
                images, labels = preprocessor.minibatch(dataset,
                                                        subset='validation')
            else:
                print("Inference with dummy data.")
                input_shape = [
                    self.args.batch_size, INCEPTION_V3_IMAGE_SIZE,
                    INCEPTION_V3_IMAGE_SIZE, 3
                ]
                images = tf.random.uniform(input_shape,
                                           0.0,
                                           255.0,
                                           dtype=tf.float32,
                                           name='synthetic_images')

        infer_graph = tf.Graph()
        with infer_graph.as_default():
            graph_def = tf.compat.v1.GraphDef()
            with tf.compat.v1.gfile.FastGFile(self.args.input_graph,
                                              'rb') as input_file:
                input_graph_content = input_file.read()
                graph_def.ParseFromString(input_graph_content)

            output_graph = optimize_for_inference(
                graph_def, [INPUTS], [OUTPUTS],
                dtypes.float32.as_datatype_enum, False)
            tf.import_graph_def(output_graph, name='')

        # Definite input and output Tensors for detection_graph
        input_tensor = infer_graph.get_tensor_by_name('input:0')
        output_tensor = infer_graph.get_tensor_by_name('predict:0')

        data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config)
        infer_sess = tf.compat.v1.Session(graph=infer_graph,
                                          config=infer_config)

        num_processed_images = 0
        num_remaining_images = datasets.IMAGENET_NUM_VAL_IMAGES

        if (not self.args.accuracy_only):
            iteration = 0
            warm_up_iteration = self.args.warmup_steps
            total_run = self.args.steps
            total_time = 0

            while num_remaining_images >= self.args.batch_size and iteration < total_run:
                iteration += 1

                data_load_start = time.time()
                image_np = data_sess.run(images)
                data_load_time = time.time() - data_load_start

                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

                start_time = time.time()
                infer_sess.run([output_tensor],
                               feed_dict={input_tensor: image_np})
                time_consume = time.time() - start_time

                # only add data loading time for real data, not for dummy data
                if self.args.data_location:
                    time_consume += data_load_time

                print('Iteration %d: %.6f sec' % (iteration, time_consume))
                if iteration > warm_up_iteration:
                    total_time += time_consume

            time_average = total_time / (iteration - warm_up_iteration)
            print('Average time: %.6f sec' % (time_average))

            print('Batch size = %d' % self.args.batch_size)
            if (self.args.batch_size == 1):
                print('Latency: %.3f ms' % (time_average * 1000))

            print('Throughput: %.3f images/sec' %
                  (self.args.batch_size / time_average))

        else:  # accuracy check
            total_accuracy1, total_accuracy5 = (0.0, 0.0)

            while num_remaining_images >= self.args.batch_size:
                # Reads and preprocess data
                np_images, np_labels = data_sess.run([images, labels])
                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

                start_time = time.time()
                # Compute inference on the preprocessed data
                predictions = infer_sess.run(output_tensor,
                                             {input_tensor: np_images})
                elapsed_time = time.time() - start_time

                with tf.Graph().as_default() as accu_graph:
                    accuracy1 = tf.reduce_sum(input_tensor=tf.cast(
                        tf.nn.in_top_k(predictions=tf.constant(predictions),
                                       targets=tf.constant(np_labels),
                                       k=1), tf.float32))

                    accuracy5 = tf.reduce_sum(input_tensor=tf.cast(
                        tf.nn.in_top_k(predictions=tf.constant(predictions),
                                       targets=tf.constant(np_labels),
                                       k=5), tf.float32))
                    with tf.compat.v1.Session() as accu_sess:
                        np_accuracy1, np_accuracy5 = accu_sess.run(
                            [accuracy1, accuracy5])

                    total_accuracy1 += np_accuracy1
                    total_accuracy5 += np_accuracy5

                print("Iteration time: %0.4f ms" % elapsed_time)
                print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \
                      % (num_processed_images, total_accuracy1 / num_processed_images,
                         total_accuracy5 / num_processed_images))
    def __init__(self):
        self.model = FLAGS.model
        self.model_conf = model_config.get_model_config(self.model)
        self.trace_filename = FLAGS.trace_file
        self.data_format = FLAGS.data_format
        self.num_batches = FLAGS.num_batches
        autotune_threshold = FLAGS.autotune_threshold if (
            FLAGS.autotune_threshold) else 1
        min_autotune_warmup = 5 * autotune_threshold * autotune_threshold
        self.num_warmup_batches = FLAGS.num_warmup_batches if (
            FLAGS.num_warmup_batches) else max(10, min_autotune_warmup)
        self.graph_file = FLAGS.graph_file
        self.resize_method = FLAGS.resize_method
        self.sync_queue_counter = 0
        self.num_gpus = FLAGS.num_gpus

        # Use the batch size from the command line if specified, otherwise use the
        # model's default batch size.  Scale the benchmark's batch size by the
        # number of GPUs.
        if FLAGS.batch_size > 0:
            self.model_conf.set_batch_size(FLAGS.batch_size)
        self.batch_size = self.model_conf.get_batch_size() * FLAGS.num_gpus

        # Use the learning rate from the command line if specified, otherwise use
        # the model's default learning rate, which must always be set.
        assert self.model_conf.get_learning_rate() > 0.0
        if FLAGS.learning_rate is not None:
            self.model_conf.set_learning_rate(FLAGS.learning_rate)

        self.job_name = FLAGS.job_name  # "" for local training
        self.ps_hosts = FLAGS.ps_hosts.split(',')
        self.worker_hosts = FLAGS.worker_hosts.split(',')
        self.dataset = None
        self.data_name = FLAGS.data_name
        if FLAGS.data_dir is not None:
            if self.data_name is None:
                if 'imagenet' in FLAGS.data_dir:
                    self.data_name = 'imagenet'
                elif 'flowers' in FLAGS.data_dir:
                    self.data_name = 'flowers'
                else:
                    raise ValueError('Could not identify name of dataset. '
                                     'Please specify with --data_name option.')
            if self.data_name == 'imagenet':
                self.dataset = datasets.ImagenetData(FLAGS.data_dir)
            elif self.data_name == 'flowers':
                self.dataset = datasets.FlowersData(FLAGS.data_dir)
            else:
                raise ValueError(
                    'Unknown dataset. Must be one of imagenet or flowers.')

        self.local_parameter_device_flag = FLAGS.local_parameter_device
        if self.job_name:
            self.task_index = FLAGS.task_index
            self.cluster = tf.train.ClusterSpec({
                'ps': self.ps_hosts,
                'worker': self.worker_hosts
            })
            self.server = None

            if not self.server:
                self.server = tf.train.Server(self.cluster,
                                              job_name=self.job_name,
                                              task_index=self.task_index,
                                              config=create_config_proto(),
                                              protocol=FLAGS.server_protocol)
            worker_prefix = '/job:worker/task:%s' % self.task_index
            self.param_server_device = tf.train.replica_device_setter(
                worker_device=worker_prefix + '/cpu:0', cluster=self.cluster)
            # This device on which the queues for managing synchronization between
            # servers should be stored.
            num_ps = len(self.ps_hosts)
            self.sync_queue_devices = [
                '/job:ps/task:%s/cpu:0' % i for i in range(num_ps)
            ]
        else:
            self.task_index = 0
            self.cluster = None
            self.server = None
            worker_prefix = ''
            self.param_server_device = '/%s:0' % FLAGS.local_parameter_device
            self.sync_queue_devices = [self.param_server_device]

        # Device to use for ops that need to always run on the local worker's CPU.
        self.cpu_device = '%s/cpu:0' % worker_prefix

        # Device to use for ops that need to always run on the local worker's
        # compute device, and never on a parameter server device.
        self.raw_devices = [
            '%s/%s:%i' % (worker_prefix, FLAGS.device, i)
            for i in xrange(FLAGS.num_gpus)
        ]

        if FLAGS.staged_vars and FLAGS.variable_update != 'parameter_server':
            raise ValueError('staged_vars for now is only supported with '
                             '--variable_update=parameter_server')

        if FLAGS.variable_update == 'parameter_server':
            if self.job_name:
                if not FLAGS.staged_vars:
                    self.variable_mgr = variable_mgr.VariableMgrDistributedFetchFromPS(
                        self)
                else:
                    self.variable_mgr = (
                        variable_mgr.VariableMgrDistributedFetchFromStagedPS(
                            self))
            else:
                if not FLAGS.staged_vars:
                    self.variable_mgr = variable_mgr.VariableMgrLocalFetchFromPS(
                        self)
                else:
                    self.variable_mgr = variable_mgr.VariableMgrLocalFetchFromStagedPS(
                        self)
        elif FLAGS.variable_update == 'replicated':
            if self.job_name:
                raise ValueError(
                    'Invalid --variable_update in distributed mode: %s' %
                    FLAGS.variable_update)
            self.variable_mgr = variable_mgr.VariableMgrLocalReplicated(
                self, FLAGS.use_nccl)
        elif FLAGS.variable_update == 'distributed_replicated':
            if not self.job_name:
                raise ValueError(
                    'Invalid --variable_update in local mode: %s' %
                    FLAGS.variable_update)
            self.variable_mgr = variable_mgr.VariableMgrDistributedReplicated(
                self)
        elif FLAGS.variable_update == 'independent':
            if self.job_name:
                raise ValueError(
                    'Invalid --variable_update in distributed mode: %s' %
                    FLAGS.variable_update)
            self.variable_mgr = variable_mgr.VariableMgrIndependent(self)
        else:
            raise ValueError('Invalid --variable_update: %s' %
                             FLAGS.variable_update)

        # Device to use for running on the local worker's compute device, but
        # with variables assigned to parameter server devices.
        self.devices = self.variable_mgr.get_devices()
        if self.job_name:
            self.global_step_device = self.param_server_device
        else:
            self.global_step_device = self.cpu_device
    def run(self):
        """run benchmark with optimized graph"""

        print("Run inference")

        data_config = tf.ConfigProto()
        data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads
        data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads
        data_config.use_per_session_threads = 1

        infer_config = tf.ConfigProto()
        infer_config.intra_op_parallelism_threads = self.args.num_intra_threads
        infer_config.inter_op_parallelism_threads = self.args.num_inter_threads
        infer_config.use_per_session_threads = 1

        data_graph = tf.Graph()
        with data_graph.as_default():
            if (self.args.data_location):
                print("Inference with real data.")
                dataset = datasets.ImagenetData(self.args.data_location)
                preprocessor = dataset.get_image_preprocessor()(
                    RESNET_IMAGE_SIZE,
                    RESNET_IMAGE_SIZE,
                    self.args.batch_size,
                    intra_threads=self.args.num_intra_threads,
                    resize_method='crop')
                images, labels = preprocessor.minibatch(dataset,
                                                        subset='validation')
            else:
                print("Inference with dummy data.")
                input_shape = [
                    self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE,
                    3
                ]
                images = tf.random.uniform(input_shape,
                                           0.0,
                                           255.0,
                                           dtype=tf.float32,
                                           name='synthetic_images')

        infer_graph = tf.Graph()
        with infer_graph.as_default():
            # convert the freezed graph to optimized graph
            graph_def = tf.GraphDef()
            with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file:
                input_graph_content = input_file.read()
                graph_def.ParseFromString(input_graph_content)

            output_graph = graph_transforms.TransformGraph(
                graph_def, [INPUTS], [OUTPUTS], [OPTIMIZATION])
            tf.import_graph_def(output_graph, name='')

        # Definite input and output Tensors for detection_graph
        input_tensor = infer_graph.get_tensor_by_name('input:0')
        #output_tensor = infer_graph.get_tensor_by_name('resnet_v1_101/SpatialSqueeze:0')
        output_tensor = infer_graph.get_tensor_by_name(
            'resnet_v1_101/predictions/Reshape_1:0')

        #tf.global_variables_initializer()
        data_sess = tf.Session(graph=data_graph, config=data_config)
        infer_sess = tf.Session(graph=infer_graph, config=infer_config)

        num_processed_images = 0
        num_remaining_images = IMAGENET_VALIDATION_IMAGES

        if (not self.args.accuracy_only):  # performance check
            iteration = 0
            warm_up_iteration = self.args.warmup_steps
            total_run = self.args.steps
            total_time = 0
            #options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            #run_metadata = tf.RunMetadata()

            while num_remaining_images >= self.args.batch_size and iteration < total_run:
                iteration += 1

                # Reads and preprocess data
                data_load_start = time.time()
                image_np = data_sess.run(images)
                data_load_time = time.time() - data_load_start

                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

                start_time = time.time()
                infer_sess.run([output_tensor],
                               feed_dict={input_tensor: image_np})
                time_consume = time.time() - start_time

                # only add data loading time for real data, not for dummy data
                if self.args.data_location:
                    time_consume += data_load_time

                #trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                #with gfile.Open('resnet101_fp32_int8_master', 'w') as trace_file:
                #    trace_file.write(trace.generate_chrome_trace_format(show_memory=False))

                print('Iteration %d: %.3f sec' % (iteration, time_consume))
                if iteration > warm_up_iteration:
                    total_time += time_consume

            time_average = total_time / (iteration - warm_up_iteration)
            print('Average time: %.3f sec' % (time_average))

            print('Batch size = %d' % self.args.batch_size)
            if (self.args.batch_size == 1):
                print('Latency: %.3f ms' % (time_average * 1000))
            # print throughput for both batch size 1 and 128
            print('Throughput: %.3f images/sec' %
                  (self.args.batch_size / time_average))

        else:  # accuracy check
            total_accuracy1, total_accuracy5 = (0.0, 0.0)

            while num_remaining_images >= self.args.batch_size:
                # Reads and preprocess data
                np_images, np_labels = data_sess.run([images, labels])
                np_labels -= 1
                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

                start_time = time.time()
                # Compute inference on the preprocessed data
                predictions = infer_sess.run(output_tensor,
                                             {input_tensor: np_images})
                elapsed_time = time.time() - start_time
                with tf.Graph().as_default() as accu_graph:
                    # Putting all code within this make things faster.
                    accuracy1 = tf.reduce_sum(
                        tf.cast(
                            tf.nn.in_top_k(tf.constant(predictions),
                                           tf.constant(np_labels), 1),
                            tf.float32))

                    accuracy5 = tf.reduce_sum(
                        tf.cast(
                            tf.nn.in_top_k(tf.constant(predictions),
                                           tf.constant(np_labels), 5),
                            tf.float32))
                    with tf.Session() as accu_sess:
                        np_accuracy1, np_accuracy5 = accu_sess.run(
                            [accuracy1, accuracy5])
                    total_accuracy1 += np_accuracy1
                    total_accuracy5 += np_accuracy5
                    print("Iteration time: %0.4f ms" % elapsed_time)
                    print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \
                        % (num_processed_images, total_accuracy1 / num_processed_images,
                           total_accuracy5 / num_processed_images))
Esempio n. 5
0
    data_config = tf.ConfigProto()
    data_config.intra_op_parallelism_threads = args.data_num_intra_threads
    data_config.inter_op_parallelism_threads = args.data_num_inter_threads
    data_config.use_per_session_threads = 1

    infer_config = tf.ConfigProto()
    infer_config.intra_op_parallelism_threads = num_intra_threads
    infer_config.inter_op_parallelism_threads = num_inter_threads
    infer_config.use_per_session_threads = 1

    data_graph = tf.Graph()
    with data_graph.as_default():
        if args.data_location:
            print("inference with real data")
            # get the images from dataset
            dataset = datasets.ImagenetData(args.data_location)
            preprocessor = dataset.get_image_preprocessor(benchmark=True)(
                input_height,
                input_width,
                batch_size,
                num_cores=args.num_cores,
                resize_method='crop')
            images = preprocessor.minibatch(dataset, subset='validation')
        else:
            # synthetic images
            print("inference with dummy data")
            input_shape = [batch_size, input_height, input_width, 3]
            images = tf.random.uniform(input_shape,
                                       0.0,
                                       255.0,
                                       dtype=tf.float32,
Esempio n. 6
0
        sys.exit("Please provide a graph file.")
    if args.input_height:
        input_height = args.input_height
    else:
        input_height = 224
    if args.input_width:
        input_width = args.input_width
    else:
        input_width = 224
    batch_size = args.batch_size
    input_layer = args.input_layer
    output_layer = args.output_layer
    num_inter_threads = args.num_inter_threads
    num_intra_threads = args.num_intra_threads
    data_location = args.data_location
    dataset = datasets.ImagenetData(data_location)
    preprocessor = dataset.get_image_preprocessor()(
        input_height,
        input_width,
        batch_size,
        1,  # device count
        tf.float32,  # data_type for input fed to the graph
        train=False,  # doing inference
        resize_method='bilinear')

    images, labels = preprocessor.minibatch(dataset,
                                            subset='train',
                                            use_datasets=True,
                                            cache_data=False)
    graph = load_graph(model_file)
    input_tensor = graph.get_tensor_by_name(input_layer + ":0")
Esempio n. 7
0
  def run(self):
    """run benchmark with optimized graph"""

    print("Run inference")

    data_config = tf.ConfigProto()
    data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads
    data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads
    data_config.use_per_session_threads = 1

    infer_config = tf.ConfigProto()
    infer_config.intra_op_parallelism_threads = self.args.num_intra_threads
    infer_config.inter_op_parallelism_threads = self.args.num_inter_threads
    infer_config.use_per_session_threads = 1

    data_graph = tf.Graph()
    with data_graph.as_default():
      if (self.args.data_location):
        print("Inference with real data.")
        if self.args.calibrate:
            subset = 'calibration'
        else:
            subset = 'validation'
        dataset = datasets.ImagenetData(self.args.data_location)
        preprocessor = dataset.get_image_preprocessor()(
            RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size,
            num_cores=self.args.num_cores,
            resize_method='crop')

        images, labels, filenames = preprocessor.minibatch(dataset, subset=subset)

        # If a results file path is provided, then start the prediction output file
        if self.args.results_file_path:
          with open(self.args.results_file_path, "w+") as fp:
            fp.write("filename,actual,prediction\n")
      else:
        print("Inference with dummy data.")
        input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3]
        images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images')

    infer_graph = tf.Graph()
    with infer_graph.as_default():
      graph_def = tf.GraphDef()
      with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file:
        input_graph_content = input_file.read()
        graph_def.ParseFromString(input_graph_content)

      output_graph = graph_transforms.TransformGraph(graph_def,
                                         [INPUTS], [OUTPUTS], [OPTIMIZATION])
      tf.import_graph_def(output_graph, name='')

    # Definite input and output Tensors for detection_graph
    input_tensor = infer_graph.get_tensor_by_name('input:0')
    output_tensor = infer_graph.get_tensor_by_name('predict:0')

    data_sess = tf.Session(graph=data_graph,  config=data_config)
    infer_sess = tf.Session(graph=infer_graph, config=infer_config)

    num_processed_images = 0
    num_remaining_images = dataset.num_examples_per_epoch(subset=subset) - num_processed_images \
        if self.args.data_location else (self.args.batch_size * self.args.steps)

    if (not self.args.accuracy_only):
      iteration = 0
      warm_up_iteration = self.args.warmup_steps
      total_run = self.args.steps
      total_time = 0

      while num_remaining_images >= self.args.batch_size and iteration < total_run:
        iteration += 1
        tf_filenames = None
        np_labels = None
        data_load_start = time.time()
        if self.args.results_file_path:
          image_np, np_labels, tf_filenames = data_sess.run([images, labels, filenames])
        else:
          image_np = data_sess.run(images)

        data_load_time = time.time() - data_load_start

        num_processed_images += self.args.batch_size
        num_remaining_images -= self.args.batch_size

        start_time = time.time()
        predictions = infer_sess.run(output_tensor, feed_dict={input_tensor: image_np})
        time_consume = time.time() - start_time

        # Write out the file name, expected label, and top prediction
        self.write_results_output(predictions, tf_filenames, np_labels)

        # only add data loading time for real data, not for dummy data
        if self.args.data_location:
          time_consume += data_load_time

        print('Iteration %d: %.6f sec' % (iteration, time_consume))
        if iteration > warm_up_iteration:
          total_time += time_consume

      time_average = total_time / (iteration - warm_up_iteration)
      print('Average time: %.6f sec' % (time_average))

      print('Batch size = %d' % self.args.batch_size)
      if (self.args.batch_size == 1):
        print('Latency: %.3f ms' % (time_average * 1000))
      # print throughput for both batch size 1 and 128
      print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average))

    else: # accuracy check
      total_accuracy1, total_accuracy5 = (0.0, 0.0)

      while num_remaining_images >= self.args.batch_size:
        # Reads and preprocess data
        tf_filenames = None
        if self.args.results_file_path:
          np_images, np_labels, tf_filenames = data_sess.run([images, labels, filenames])
        else:
          np_images, np_labels = data_sess.run([images, labels])
        num_processed_images += self.args.batch_size
        num_remaining_images -= self.args.batch_size

        start_time = time.time()
        # Compute inference on the preprocessed data
        predictions = infer_sess.run(output_tensor,
                               {input_tensor: np_images})
        elapsed_time = time.time() - start_time

        # Write out the file name, expected label, and top prediction
        self.write_results_output(predictions, tf_filenames, np_labels)

        with tf.Graph().as_default() as accu_graph:
          accuracy1 = tf.reduce_sum(
            tf.cast(tf.nn.in_top_k(tf.constant(predictions),
                                   tf.constant(np_labels), 1), tf.float32))

          accuracy5 = tf.reduce_sum(
            tf.cast(tf.nn.in_top_k(tf.constant(predictions),
                                   tf.constant(np_labels), 5), tf.float32))
          with tf.Session() as accu_sess:
            np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5])

          total_accuracy1 += np_accuracy1
          total_accuracy5 += np_accuracy5

        print("Iteration time: %0.4f ms" % elapsed_time)
        print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \
                  % (num_processed_images, total_accuracy1 / num_processed_images,
                     total_accuracy5 / num_processed_images))
  def run(self):
    """run benchmark with optimized graph"""

    with tf.Graph().as_default() as graph:

      config = tf.ConfigProto()
      config.allow_soft_placement = True
      config.intra_op_parallelism_threads = self.args.num_intra_threads
      config.inter_op_parallelism_threads = self.args.num_inter_threads

      with tf.Session(config=config) as sess:

        # convert the freezed graph to optimized graph
        graph_def = tf.GraphDef()
        with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file:
          input_graph_content = input_file.read()
          graph_def.ParseFromString(input_graph_content)

        output_graph = graph_transforms.TransformGraph(graph_def,
                                                       [INPUTS], [OUTPUTS], [OPTIMIZATION])
        sess.graph.as_default()
        tf.import_graph_def(output_graph, name='')

        # Definite input and output Tensors for detection_graph
        input_tensor = graph.get_tensor_by_name('input:0')
        output_tensor = graph.get_tensor_by_name('predict:0')
        tf.global_variables_initializer()

        num_processed_images = 0
        num_remaining_images = IMAGENET_VALIDATION_IMAGES

        if (self.args.data_location):
          print("Inference with real data.")
          dataset = datasets.ImagenetData(self.args.data_location)
          preprocessor = preprocessing.ImagePreprocessor(
            RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size,
            1,  # device count
            tf.float32,  # data_type for input fed to the graph
            train=False,  # doing inference
            resize_method='crop')
          images, labels, filenames = preprocessor.minibatch(dataset, subset='validation')
          num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \
                                 - num_processed_images
        else:
          print("Inference with dummy data.")
          input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3]
          images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images')

        if (not self.args.accuracy_only):  # performance check
          iteration = 0
          warm_up_iteration = 10
          total_run = 40
          total_time = 0

          while num_remaining_images >= self.args.batch_size and iteration < total_run:
            iteration += 1

            # Reads and preprocess data
            if (self.args.data_location):
              preprocessed_images = sess.run([images[0]])
              image_np = preprocessed_images[0]
            else:
              image_np = sess.run(images)

            num_processed_images += self.args.batch_size
            num_remaining_images -= self.args.batch_size

            start_time = time.time()
            (predicts) = sess.run([output_tensor], feed_dict={input_tensor: image_np})
            time_consume = time.time() - start_time

            print('Iteration %d: %.3f sec' % (iteration, time_consume))
            if iteration > warm_up_iteration:
              total_time += time_consume

          time_average = total_time / (iteration - warm_up_iteration)
          print('Average time: %.3f sec' % (time_average))

          print('Batch size = %d' % self.args.batch_size)
          if (self.args.batch_size == 1):
            print('Latency: %.3f ms' % (time_average * 1000))
          # print throughput for both batch size 1 and 128
          print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average))

        else:  # accuracy check
          total_accuracy1, total_accuracy5 = (0.0, 0.0)

          # If a results file path is provided, then start the prediction output file
          if self.args.results_file_path:
            with open(self.args.results_file_path, "w+") as fp:
              fp.write("filename,actual,prediction\n")

          while num_remaining_images >= self.args.batch_size:
            # Reads and preprocess data
            np_images, np_labels, tf_filenames = sess.run(
                [images[0], labels[0], filenames[0]])
            num_processed_images += self.args.batch_size
            num_remaining_images -= self.args.batch_size

            # Compute inference on the preprocessed data
            predictions = sess.run(output_tensor,
                                   {input_tensor: np_images})

            # Write out the file name, expected label, and top prediction
            if self.args.results_file_path:
              top_predictions = np.argmax(predictions, 1)
              with open(self.args.results_file_path, "a") as fp:
                for filename, expected_label, top_prediction in \
                        zip(tf_filenames, np_labels, top_predictions):
                  fp.write("{},{},{}\n".format(filename, expected_label, top_prediction))

            accuracy1 = tf.reduce_sum(
              tf.cast(tf.nn.in_top_k(tf.constant(predictions),
                                     tf.constant(np_labels), 1), tf.float32))

            accuracy5 = tf.reduce_sum(
              tf.cast(tf.nn.in_top_k(tf.constant(predictions),
                                     tf.constant(np_labels), 5), tf.float32))
            np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5])
            total_accuracy1 += np_accuracy1
            total_accuracy5 += np_accuracy5
            print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \
                  % (num_processed_images, total_accuracy1 / num_processed_images,
                     total_accuracy5 / num_processed_images))
Esempio n. 9
0
    def eval_inference(self, infer_graph):
        """run benchmark with optimized graph"""

        print("Run inference")

        data_config = tf.compat.v1.ConfigProto()
        data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads
        data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads
        data_config.use_per_session_threads = 1

        infer_config = tf.compat.v1.ConfigProto()
        if self.args.env == 'mkl':
            print("Set inter and intra for mkl")
            infer_config.intra_op_parallelism_threads = self.args.num_intra_threads
            infer_config.inter_op_parallelism_threads = self.args.num_inter_threads
        infer_config.use_per_session_threads = 1

        data_graph = tf.Graph()
        with data_graph.as_default():
            if (self.args.data_location):
                print("Inference with real data.")
                dataset = datasets.ImagenetData(self.args.data_location)
                preprocessor = dataset.get_image_preprocessor()(
                    self.args.image_size,
                    self.args.image_size,
                    self.args.batch_size,
                    num_cores=self.args.num_cores,
                    resize_method=self.args.resize_method,
                    mean_value=[
                        self.args.r_mean, self.args.g_mean, self.args.b_mean
                    ],
                    scale=self.args.scale)
                images, labels = preprocessor.minibatch(dataset,
                                                        subset='validation')
            else:
                print("Inference with dummy data.")
                input_shape = [
                    self.args.batch_size, self.args.image_size,
                    self.args.image_size, 3
                ]
                images = tf.random.uniform(input_shape,
                                           0.0,
                                           255.0,
                                           dtype=tf.float32,
                                           name='synthetic_images')

        # Definite input and output Tensors for detection_graph
        input_tensor = infer_graph.get_tensor_by_name(self.args.input + ':0')
        output_tensor = infer_graph.get_tensor_by_name(self.args.output + ':0')

        data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config)
        infer_sess = tf.compat.v1.Session(graph=infer_graph,
                                          config=infer_config)

        num_processed_images = 0
        num_remaining_images = datasets.IMAGENET_NUM_VAL_IMAGES

        if (not self.args.accuracy_only):
            iteration = 0
            warm_up_iteration = self.args.warmup_steps
            total_run = self.args.steps
            total_time = 0

            while num_remaining_images >= self.args.batch_size and iteration < total_run:
                iteration += 1

                data_load_start = time.time()
                image_np = data_sess.run(images)
                data_load_time = time.time() - data_load_start

                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

                start_time = time.time()
                infer_sess.run([output_tensor],
                               feed_dict={input_tensor: image_np})
                time_consume = time.time() - start_time

                # # only add data loading time for real data, not for dummy data
                # if self.args.data_location:
                #   time_consume += data_load_time

                print('Iteration %d: %.6f sec' % (iteration, time_consume))
                if iteration > warm_up_iteration:
                    total_time += time_consume

            time_average = total_time / (iteration - warm_up_iteration)
            print('Average time: %.6f sec' % (time_average))

            print('Batch size = %d' % self.args.batch_size)
            if (self.args.batch_size == 1):
                print('Latency: %.3f ms' % (time_average * 1000))

            print('Throughput: %.3f images/sec' %
                  (self.args.batch_size / time_average))

        else:  # accuracy check
            total_accuracy1, total_accuracy5 = (0.0, 0.0)

            while num_remaining_images >= self.args.batch_size:
                # Reads and preprocess data
                np_images, np_labels = data_sess.run([images, labels])
                if self.args.label_adjust:
                    np_labels -= 1
                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

                start_time = time.time()
                # Compute inference on the preprocessed data
                predictions = infer_sess.run(output_tensor,
                                             {input_tensor: np_images})
                elapsed_time = time.time() - start_time

                # DenseNet output dim reshape (4 -> 2) for accuracy test
                if len(predictions.shape) > 2:
                    predictions = predictions.reshape(predictions.shape[0], -1)

                with tf.Graph().as_default() as accu_graph:
                    accuracy1 = tf.reduce_sum(input_tensor=tf.cast(
                        tf.nn.in_top_k(predictions=tf.constant(predictions),
                                       targets=tf.constant(np_labels),
                                       k=1), tf.float32))

                    accuracy5 = tf.reduce_sum(input_tensor=tf.cast(
                        tf.nn.in_top_k(predictions=tf.constant(predictions),
                                       targets=tf.constant(np_labels),
                                       k=5), tf.float32))
                    with tf.compat.v1.Session() as accu_sess:
                        np_accuracy1, np_accuracy5 = accu_sess.run(
                            [accuracy1, accuracy5])

                    total_accuracy1 += np_accuracy1
                    total_accuracy5 += np_accuracy5

                print("Iteration time: %0.4f ms" % elapsed_time)
                print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \
                      % (num_processed_images, total_accuracy1 / num_processed_images,
                         total_accuracy5 / num_processed_images))

            print("Accuracy: %.5f" % (total_accuracy1 / num_processed_images))