Esempio n. 1
0
  def infer(self, image_path):
    import numpy as np
    from PIL import Image

    if self.config is None:
      tf.logging.error('Configuration is None')
      return None

    model_name = self.config['model_name']
    checkpoint_path = self.config['checkpoint_path']

    labels_to_names = None
    if dataset_utils.has_labels(checkpoint_path, 'label_map.txt'):
      labels_to_names = dataset_utils.read_label_file(checkpoint_path, 'label_map.txt')
    else:
      tf.logging.error('No label map')
      return None    

    keys = list(labels_to_names.keys())
    with tf.Graph().as_default():
      image_string = tf.read_file(image_path)
      image = tf.image.decode_jpeg(image_string, channels=3)

      image_preprocessing_fn = preprocessing_factory.get_preprocessing(
                               model_name, is_training=False)
      network_fn = nets_factory.get_network_fn(
                   model_name,
                   num_classes=len(keys),
                   is_training=False)
      
      processed_image = image_preprocessing_fn(image, 
                        network_fn.default_image_size, network_fn.default_image_size)
      image_expanded = tf.expand_dims(processed_image, axis=0)

      logits, _ = network_fn(image_expanded)
      probabilites = tf.nn.softmax(logits)     
      predictions = tf.argmax(logits, 1)
      model_path = tf.train.latest_checkpoint(checkpoint_path)      
      init_fn = slim.assign_from_checkpoint_fn(model_path, 
                slim.get_model_variables(scope_map[model_name]))

      with tf.Session() as sess:
        init_fn(sess)
        probs, pred = sess.run([probabilites, predictions])
        result =[]
        for i in range(len(probs[0])):
          result.append({'type': labels_to_names[keys[i]], 'prob': str(probs[0][i])})
        sorted_result = sorted(result, key=lambda k: float(k['prob']), reverse=True)        
        return sorted_result
Esempio n. 2
0
  def eval(self):
    # config = self.config
    # if config is None:
    #   tf.logging.error('There is no input configurations.')
    #   return

    # with open(config['eval_configs']) as f:
    #   eval_configs = json.load(f)
    # eval_configs['batch_size'] = int(config['batch_size'])
    # eval_configs['model_name'] = config['model_name']
    # eval_configs['dataset_dir'] = config['data_dir']
    # eval_configs['checkpoint_path'] = config['checkpoint_path']
    # eval_configs['eval_dir'] = config['eval_dir']
    #self.create_tf_data(ratio=0)

    eval_configs = {}
    eval_configs['batch_size'] = 100
    eval_configs['model_name'] = 'inception_v1'
    eval_configs['dataset_dir'] = '/home/wujia/examples/platform/test-platform/CVTron-Serve/cvtron-serve/static/data/classification/'
    eval_configs['checkpoint_path'] = '/home/wujia/examples/platform/test-platform/CVTron-Serve/cvtron-serve/static/model/classification/inception_v1/'
    eval_configs['val_dir'] = '/home/wujia/examples/platform/test-platform/CVTron-Serve/cvtron-serve/static/log/'
    eval_configs['num_preprocessing_threads'] = 4
    eval_configs['labels_offset'] = 0
    eval_configs['moving_average_decay'] = None
    eval_configs['max_num_batches'] = None
    eval_configs['master'] = ''
    eval_configs['preprocessing_name'] = ''
    eval_configs['eval_image_size'] = None
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    self.num_classes = 5
    self.splits_to_sizes = {'train': 3320, 'val': 350}
    self.items_to_descriptions = {'image': 'A color image of varying size.',
                                  'label': 'A single integer between 0 and 4'} 

    if not eval_configs['dataset_dir']:
      raise ValueError('You must supply the dataset directory with --dataset_dir')

    with tf.Graph().as_default():
      tf_global_step = slim.get_or_create_global_step()

      ######################
      # Select the dataset #
      ######################
      dataset = self.get_dataset('train', eval_configs['dataset_dir'])

      ####################
      # Select the model #
      ####################
      network_fn = nets_factory.get_network_fn(
          eval_configs['model_name'],
          num_classes=(dataset.num_classes - eval_configs['labels_offset']),
          is_training=False)

      ##############################################################
      # Create a dataset provider that loads data from the dataset #
      ##############################################################
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          shuffle=False,
          common_queue_capacity=2 * eval_configs['batch_size'],
          common_queue_min=eval_configs['batch_size'])
      [image, label] = provider.get(['image', 'label'])
      label -= eval_configs['labels_offset']

      #####################################
      # Select the preprocessing function #
      #####################################
      preprocessing_name = eval_configs['preprocessing_name'] or eval_configs['model_name']
      image_preprocessing_fn = preprocessing_factory.get_preprocessing(
          preprocessing_name,
          is_training=False)

      eval_image_size = eval_configs['eval_image_size'] or network_fn.default_image_size

      image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=eval_configs['batch_size'],
          num_threads=eval_configs['num_preprocessing_threads'],
          capacity=5 * eval_configs['batch_size'])

      ####################
      # Define the model #
      ####################
      logits, _ = network_fn(images)

      if eval_configs['moving_average_decay']:
        variable_averages = tf.train.ExponentialMovingAverage(
            eval_configs['moving_average_decay'], tf_global_step)
        variables_to_restore = variable_averages.variables_to_restore(
            slim.get_model_variables())
        variables_to_restore[tf_global_step.op.name] = tf_global_step
      else:
        variables_to_restore = slim.get_variables_to_restore()

      predictions = tf.argmax(logits, 1)
      labels = tf.squeeze(labels)

      # Define the metrics:
      names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
          'Accuracy': slim.metrics.streaming_accuracy(predictions, labels),
          'Recall_5': slim.metrics.streaming_recall_at_k(
              logits, labels, 5),
      })

      # Print the summaries to screen.
      for name, value in names_to_values.items():
        summary_name = 'eval/%s' % name
        op = tf.summary.scalar(summary_name, value, collections=[])
        op = tf.Print(op, [value], summary_name)
        tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

      # TODO(sguada) use num_epochs=1
      if eval_configs['max_num_batches']:
        num_batches = eval_configs['max_num_batches']
      else:
        # This ensures that we make a single pass over all of the data.
        num_batches = math.ceil(dataset.num_samples / float(eval_configs['batch_size']))

      if tf.gfile.IsDirectory(eval_configs['checkpoint_path']):
        checkpoint_path = tf.train.latest_checkpoint(eval_configs['checkpoint_path'])
      else:
        checkpoint_path = eval_configs['checkpoint_path']

      tf.logging.info('Evaluating %s' % checkpoint_path)

      slim.evaluation.evaluate_once(
          master=eval_configs['master'],
          checkpoint_path=checkpoint_path,
          logdir=eval_configs['val_dir'],
          num_evals=num_batches,
          eval_op=list(names_to_updates.values()),
          variables_to_restore=variables_to_restore)
Esempio n. 3
0
  def train(self):
    config = self.config
    if config is None:
      tf.logging.error('There is no input configurations.')
      return

    try:
      with open(config['training_configs']) as f:
        training_configs = json.load(f)
      training_configs['tf_configs']['train_dir'] = config['train_dir']
      training_configs['tf_configs']['log_every_n_steps'] = int(config['log_every_n_steps'])
      training_configs['optimization_params']['optimizer'] = config['optimizer']
      training_configs['learning_rate_params']['learning_rate'] = float(config['learning_rate'])
      training_configs['dataset_params']['batch_size'] = int(config['batch_size'])
      training_configs['dataset_params']['model_name'] = config['model_name']
      training_configs['dataset_params']['dataset_dir'] = config['data_dir']
      training_configs['fine_tuning_params']['checkpoint_path'] = config['fine_tuning_ckpt_path']
      if training_configs['fine_tuning_params']['checkpoint_path'] is not None:
        training_configs['fine_tuning_params']['checkpoint_exclude_scopes'] = \
        exclude_scopes_map[training_configs['dataset_params']['model_name']].format(\
        scope_map[training_configs['dataset_params']['model_name']], \
        scope_map[training_configs['dataset_params']['model_name']])
        training_configs['fine_tuning_params']['trainable_scopes'] = \
        exclude_scopes_map[training_configs['dataset_params']['model_name']].format(\
        scope_map[training_configs['dataset_params']['model_name']], \
        scope_map[training_configs['dataset_params']['model_name']])
      self.training_configs = training_configs

      with tf.Graph().as_default():
        # use only one gpu
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = str(1)
        # from tensorflow.python.client import device_lib 
        # local_device_protos = device_lib.list_local_devices()

        # create tf_record data
        # self.create_tf_data()
        self.num_classes = 5
        self.splits_to_sizes = {'train': 3320, 'val': 350}
        self.items_to_descriptions = {'image': 'A color image of varying size.',
                                      'label': 'A single integer between 0 and 4'}        

        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=training_configs['tf_configs']['num_clones'],
            clone_on_cpu=training_configs['tf_configs']['clone_on_cpu'],
            replica_id=training_configs['tf_configs']['task'],
            num_replicas=training_configs['tf_configs']['worker_replicas'],
            num_ps_tasks=training_configs['tf_configs']['num_ps_tasks'])

        # Create global_step
        with tf.device(deploy_config.variables_device()):
          global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = self.get_dataset('train', training_configs['dataset_params']['dataset_dir'])

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(
            training_configs['dataset_params']['model_name'],
            num_classes=(dataset.num_classes - training_configs['dataset_params']['label_offset']),
            weight_decay=training_configs['optimization_params']['weight_decay'],
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = training_configs['dataset_params']['preprocessing_name'] or training_configs['dataset_params']['model_name']
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name,
            is_training=True)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
          provider = slim.dataset_data_provider.DatasetDataProvider(
              dataset,
              num_readers=training_configs['tf_configs']['num_readers'],
              common_queue_capacity=20 * training_configs['dataset_params']['batch_size'],
              common_queue_min=10 * training_configs['dataset_params']['batch_size'])
          [image, label] = provider.get(['image', 'label'])
          label -= training_configs['dataset_params']['label_offset']

          train_image_size = training_configs['dataset_params']['train_image_size'] or network_fn.default_image_size

          image = image_preprocessing_fn(image, train_image_size, train_image_size)

          images, labels = tf.train.batch(
              [image, label],
              batch_size=training_configs['dataset_params']['batch_size'],
              num_threads=training_configs['tf_configs']['num_preprocessing_threads'],
              capacity=5 * training_configs['dataset_params']['batch_size'])
          labels = slim.one_hot_encoding(
              labels, dataset.num_classes - training_configs['dataset_params']['label_offset'])
          batch_queue = slim.prefetch_queue.prefetch_queue(
              [images, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
          """Allows data parallelism by creating multiple clones of network_fn."""
          images, labels = batch_queue.dequeue()
          logits, end_points = network_fn(images)

          #############################
          # Specify the loss function #
          #############################
          if 'AuxLogits' in end_points:
            slim.losses.softmax_cross_entropy(
                end_points['AuxLogits'], labels,
                label_smoothing=training_configs['learning_rate_params']['label_smoothing'], weights=0.4,
                scope='aux_loss')
          slim.losses.softmax_cross_entropy(
              logits, labels, label_smoothing=training_configs['learning_rate_params']['label_smoothing'], weights=1.0)
          return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
          x = end_points[end_point]
          summaries.add(tf.summary.histogram('activations/' + end_point, x))
          summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                          tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
          summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
          summaries.add(tf.summary.histogram(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if training_configs['learning_rate_params']['moving_average_decay']:
          moving_average_variables = slim.get_model_variables()
          variable_averages = tf.train.ExponentialMovingAverage(
              training_configs['learning_rate_params']['moving_average_decay'], global_step)
        else:
          moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
          learning_rate = self._configure_learning_rate(dataset.num_samples, global_step)
          optimizer = self._configure_optimizer(learning_rate)
          summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if training_configs['learning_rate_params']['sync_replicas']:
          # If sync_replicas is enabled, the averaging will be done in the chief
          # queue runner.
          optimizer = tf.train.SyncReplicasOptimizer(
              opt=optimizer,
              replicas_to_aggregate=training_configs['learning_rate_params']['replicas_to_aggregate'],
              total_num_replicas=training_configs['tf_configs']['worker_replicas'],
              variable_averages=variable_averages,
              variables_to_average=moving_average_variables)
        elif training_configs['learning_rate_params']['moving_average_decay']:
          # Update ops executed locally by trainer.
          update_ops.append(variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = self._get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones,
            optimizer,
            var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
          train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                           first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        train_dir = training_configs['tf_configs']['train_dir']
        if not os.path.exists(train_dir):
          os.makedirs(train_dir)
        copy(training_configs['dataset_params']['dataset_dir'] + 'label_map.txt', training_configs['tf_configs']['train_dir']) ##
        weblog_dir = config['weblog_dir']
        if not os.path.exists(weblog_dir):
          os.makedirs(weblog_dir)

        logger = Logger('Training Monitor')      

        ###########################
        # Kicks off the training. #
        ###########################
        learning.train(
            train_tensor,
            logdir=train_dir,
            master=training_configs['tf_configs']['master'],
            is_chief=(training_configs['tf_configs']['task'] == 0),
            init_fn=self._get_init_fn(),
            summary_op=summary_op,
            log_every_n_steps=training_configs['tf_configs']['log_every_n_steps'],
            save_summaries_secs=training_configs['tf_configs']['save_summaries_secs'],
            save_interval_secs=training_configs['tf_configs']['save_interval_secs'],
            sync_optimizer=optimizer if training_configs['learning_rate_params']['sync_replicas'] else None,
            logger=logger,
            weblog_dir=weblog_dir)
    except:
      tf.logging.error('Unexpected error')