def init_object_detector_graph(self, input_h, input_w, init_weights):

        self.is_train = _tf.placeholder(
            _tf.bool)  # Set flag for training or val

        # Create placeholders for image and labels
        self.images = _tf.placeholder(_tf.float32,
                                      [self.batch_size, input_h, input_w, 3],
                                      name="images")
        self.labels = _tf.placeholder(
            _tf.float32,
            [
                self.batch_size,
                self.grid_shape[0],
                self.grid_shape[1],
                self.num_anchors,
                self.num_classes + 5,
            ],
            name="labels",
        )

        self.tf_model = self.tiny_yolo(inputs=self.images,
                                       output_size=self.output_size)
        self.global_step = _tf.Variable(0, trainable=False, name="global_step")

        self.loss = self.loss_layer(self.tf_model, self.labels)
        self.base_lr = _utils.convert_shared_float_array_to_numpy(
            self.config["learning_rate"])
        self.num_iterations = int(
            _utils.convert_shared_float_array_to_numpy(
                self.config["num_iterations"]))
        self.init_steps = [
            self.num_iterations // 2,
            3 * self.num_iterations // 4,
            self.num_iterations,
        ]
        self.lrs = [
            _np.float32(self.base_lr * 10**(-i))
            for i, step in enumerate(self.init_steps)
        ]
        self.steps_tf = self.init_steps[:-1]
        self.lr = _tf.train.piecewise_constant(self.global_step, self.steps_tf,
                                               self.lrs)
        # TODO: Evaluate method to update lr in set_learning_rate()

        self.opt = _tf.train.MomentumOptimizer(self.lr, momentum=0.9)

        self.clip_value = _utils.convert_shared_float_array_to_numpy(
            self.config.get("gradient_clipping"))

        grads_and_vars = self.opt.compute_gradients(self.loss)
        clipped_gradients = [(self.ClipIfNotNone(g, self.clip_value), v)
                             for g, v in grads_and_vars]
        self.train_op = self.opt.apply_gradients(clipped_gradients,
                                                 global_step=self.global_step)

        self.sess.run(_tf.global_variables_initializer())
        self.sess.run(_tf.local_variables_initializer())

        self.load_weights(init_weights)
    def __init__(self, input_h, input_w, batch_size, output_size, out_h, out_w, init_weights, config):

        self.gpu_policy = _utils.TensorFlowGPUPolicy()
        self.gpu_policy.start()


        # Converting incoming weights from shared_float_array to numpy
        for key in init_weights.keys():
            init_weights[key] = _utils.convert_shared_float_array_to_numpy(init_weights[key])

        self.od_graph = _tf.Graph()
        self.config = config
        self.batch_size = batch_size
        self.grid_shape = [out_h, out_w]
        self.num_classes = int(_utils.convert_shared_float_array_to_numpy(config['num_classes']))
        self.anchors = [
            (1.0, 2.0), (1.0, 1.0), (2.0, 1.0),
            (2.0, 4.0), (2.0, 2.0), (4.0, 2.0),
            (4.0, 8.0), (4.0, 4.0), (8.0, 4.0),
            (8.0, 16.0), (8.0, 8.0), (16.0, 8.0),
            (16.0, 32.0), (16.0, 16.0), (32.0, 16.0),
        ]
        self.num_anchors = len(self.anchors)
        self.output_size = output_size
        self.sess = _tf.Session(graph=self.od_graph)
        with self.od_graph.as_default():
            self.init_object_detector_graph(input_h, input_w, init_weights)
    def __init__(self, config, net_params):

        self.gpu_policy = _utils.TensorFlowGPUPolicy()
        self.gpu_policy.start()
        
        _tf.reset_default_graph()

        for key in net_params.keys():
            net_params[key] = _utils.convert_shared_float_array_to_numpy(net_params[key])

        for key in config.keys():
            config[key] = _utils.convert_shared_float_array_to_numpy(config[key])

        self._batch_size = 1
        self._finetune_all_params = True
        self._define_training_graph = bool(config['st_training'])
        self._tf_variables = define_tensorflow_variables(net_params)

        # TODO: take care of batch size
        self.tf_input = _tf.placeholder(dtype = _tf.float32, shape = [None, 256, 256, 3])
        self.tf_style = _tf.placeholder(dtype = _tf.float32, shape = [None, 256, 256, 3])
        self.tf_index = _tf.placeholder(dtype = _tf.int64, shape = [self.batch_size])

        self.__define_graph();
        
        self.sess = _tf.Session()
        init = _tf.global_variables_initializer()
        self.sess.run(init)
Example #4
0
 def get_augmented_data(self, images, annotations):
     with tf.Session(graph=self.graph) as session:
         feed_dict = dict()
         graph_op = self.resize_op_batch[0 : len(images)]
         for i in range(0, len(images)):
             feed_dict[self.img_tf[i]] = _utils.convert_shared_float_array_to_numpy(
                 images[i]
             )
             if self.resize_only:
                 feed_dict[self.ann_tf[i]] = self.batch_size * [np.zeros(6)]
             else:
                 feed_dict[
                     self.ann_tf[i]
                 ] = _utils.convert_shared_float_array_to_numpy(annotations[i])
         aug_output = session.run(graph_op, feed_dict=feed_dict)
         processed_images = []
         processed_annotations = []
         for o in aug_output:
             processed_images.append(o[0])
             processed_annotations.append(
                 np.ascontiguousarray(o[1], dtype=np.float32)
             )
         processed_images = np.array(processed_images, dtype=np.float32)
         processed_images = np.ascontiguousarray(processed_images, dtype=np.float32)
         return (processed_images, processed_annotations)
Example #5
0
    def get_augmented_data(self, images, annotations, random_seed):
        tf = _lazy_import_tensorflow()
        with tf.Session(graph=self.graph) as session:
            feed_dict = dict()

            # Populate feed_dict with images and annotations
            graph_op = self.resize_op_batch[0:len(images)]
            for i in range(len(images)):
                feed_dict[self.img_tf[
                    i]] = _utils.convert_shared_float_array_to_numpy(images[i])
                feed_dict[self.ann_tf[
                    i]] = _utils.convert_shared_float_array_to_numpy(
                        annotations[i])

            # Populate feed_dict with random seed and random alpha values, used
            # to sample image perturbations. We don't use TensorFlow's built-in
            # support for random number generation, since we want to effectively
            # reset the seed for each session (batch).
            random = np.random.RandomState(seed=random_seed)
            feed_dict[self.alpha_tf] = random.rand(*self.alpha_tf.shape)
            feed_dict[self.random_seed_tf] = random.randint(
                0, 2**32, size=self.batch_size)
            aug_output = session.run(graph_op, feed_dict=feed_dict)
            processed_images = []
            processed_annotations = []
            for o in aug_output:
                processed_images.append(o[0])
                processed_annotations.append(
                    np.ascontiguousarray(o[1], dtype=np.float32))
            processed_images = np.array(processed_images, dtype=np.float32)
            processed_images = np.ascontiguousarray(processed_images,
                                                    dtype=np.float32)
            return (processed_images, processed_annotations)
Example #6
0
    def __init__(self, input_h, input_w, batch_size, output_size, init_weights, config, is_train=True):

        #reset tensorflow graph when a new model is created
        _tf.reset_default_graph()

        # Converting incoming weights from shared_float_array to numpy
        for key in init_weights.keys():
            init_weights[key] = _utils.convert_shared_float_array_to_numpy(init_weights[key])

        self.config = config
        self.batch_size = batch_size
        self.grid_shape = [13,13]
        self.num_classes = int(_utils.convert_shared_float_array_to_numpy(config['num_classes']))
        self.anchors = [
            (1.0, 2.0), (1.0, 1.0), (2.0, 1.0),
            (2.0, 4.0), (2.0, 2.0), (4.0, 2.0),
            (4.0, 8.0), (4.0, 4.0), (8.0, 4.0),
            (8.0, 16.0), (8.0, 8.0), (16.0, 8.0),
            (16.0, 32.0), (16.0, 16.0), (32.0, 16.0),
        ]
        self.num_anchors = len(self.anchors)
        self.output_size = output_size
        self.is_train = is_train  # Set flag for training or val

        # Create placeholders for image and labels
        self.images = _tf.placeholder(_tf.float32, [self.batch_size, input_h,
                                                              input_w, 3], name='images')
        self.labels = _tf.placeholder(_tf.float32,
                                [self.batch_size, self.grid_shape[0], self.grid_shape[1],
                                 self.num_anchors, self.num_classes + 5],
                                name='labels')
        self.init_weights = init_weights
        self.tf_model = self.tiny_yolo(inputs=self.images, output_size=self.output_size)
        self.global_step = _tf.Variable(0, trainable=False,
                                        name="global_step")

        self.loss = self.loss_layer(self.tf_model, self.labels)
        self.base_lr = _utils.convert_shared_float_array_to_numpy(config['learning_rate'])
        self.num_iterations = int(_utils.convert_shared_float_array_to_numpy(config['num_iterations']))
        self.init_steps = [self.num_iterations // 2, 3 * self.num_iterations // 4, self.num_iterations]
        self.lrs = [_np.float32(self.base_lr * 10 ** (-i)) for i, step in enumerate(self.init_steps)]
        self.steps_tf = self.init_steps[:-1]
        self.lr = _tf.train.piecewise_constant(self.global_step, self.steps_tf, self.lrs)
        # TODO: Evaluate method to update lr in set_learning_rate()

        self.opt = _tf.train.MomentumOptimizer(self.lr, momentum=0.9)

        self.clip_value = _utils.convert_shared_float_array_to_numpy(self.config.get('gradient_clipping'))

        grads_and_vars = self.opt.compute_gradients(self.loss)
        clipped_gradients = [(self.ClipIfNotNone(g, self.clip_value), v) for g, v in grads_and_vars]
        self.train_op = self.opt.apply_gradients(clipped_gradients, global_step=self.global_step)


        self.sess = _tf.Session()
        self.sess.run(_tf.global_variables_initializer())
        self.sess.run(_tf.local_variables_initializer())

        self.load_weights(self.init_weights)
def apply_bounding_box_transformation(images, annotations, transformations, clip_to_shape=None):

  aug_anns = []
  for i in range(len(annotations)):
      image = _utils.convert_shared_float_array_to_numpy(images[i])
      height = image.shape[0]
      width = image.shape[0]
      ann = annotations[i]
      annotation = _utils.convert_shared_float_array_to_numpy(ann)
      identifier = np.expand_dims(annotation[:, 0], axis=1)
      box = np.zeros(annotation[:, 1:5].shape)
      for j in range(len(annotation)):
          box[j][0] = annotation[j][2]*float(height)
          box[j][1] = annotation[j][1]*float(width)
          box[j][2] = (annotation[j][4]+annotation[j][2])*float(height)
          box[j][3] = (annotation[j][3]+annotation[j][1])*float(width) 
      
      confidence = np.expand_dims(annotation[:, 5], axis=1)

      # The bounding box is [n, 4] reshaped and ones added to multiply to tranformation matrix
      v = np.concatenate([box.reshape(-1, 2), np.ones((box.shape[0]*2, 1), dtype=np.float32)], axis=1)
      # Transform
      v = np.dot(v, np.transpose(transformations[i]))
      # Reverse shape
      bbox_out = v[:, :2].reshape(-1, 4)
      
      # Make points correctly ordered (lower < upper)
      # Can probably be made much nicer (numpy-ified?)
      for i in range(len(bbox_out)):
          if bbox_out[i][0] > bbox_out[i][2]:
              bbox_out[i][0], bbox_out[i][2] = bbox_out[i][2], bbox_out[i][0]
          if bbox_out[i][1] > bbox_out[i][3]:
              bbox_out[i][1], bbox_out[i][3] = bbox_out[i][3], bbox_out[i][1]

          if clip_to_shape is not None:
              bbox_out[:, 0::2] = np.clip(bbox_out[:, 0::2], 0, clip_to_shape[0])
              bbox_out[:, 1::2] = np.clip(bbox_out[:, 1::2], 0, clip_to_shape[1])

      bbox = np.zeros(bbox_out.shape)
      for k in range(len(bbox_out)):
          bbox[k][0] = bbox_out[k][1]/float(clip_to_shape[0])
          bbox[k][1] = bbox_out[k][0]/float(clip_to_shape[1])
          bbox[k][2] = (bbox_out[k][3] - bbox_out[k][1])/float(clip_to_shape[0])
          bbox[k][3] = (bbox_out[k][2] - bbox_out[k][0])/float(clip_to_shape[1])
      
      an = np.hstack((np.hstack((identifier, bbox)), confidence))
      an = np.ascontiguousarray(an, dtype=np.float32)
      aug_anns.append(an)
  return aug_anns
def get_resized_images(images, output_shape):
    
    resized_images = []
    for i in range(len(images)):
        
        image = images[i] 
        image = _utils.convert_shared_float_array_to_numpy(image)
        height, width, _ = tf.unstack(tf.shape(image))
        orig_shape = (height, width)
        scale_h = tf.constant(output_shape[0], dtype=tf.float32) / tf.to_float(height)
        scale_w = tf.constant(output_shape[1], dtype=tf.float32) / tf.to_float(width)
        new_height = tf.to_int32(tf.to_float(height) * scale_h)
        new_width = tf.to_int32(tf.to_float(width) * scale_w)

        image_scaled = tf.squeeze(tf.image.resize_bilinear(tf.expand_dims(image, 0), [new_height, new_width]), [0])

        pad_image, pad_offset = pad_to_ensure_size(image_scaled, output_shape[0], output_shape[1],
              random=False)

        new_height = tf.maximum(output_shape[0], new_height)
        new_width = tf.maximum(output_shape[1], new_width)

        slice_offset = (tf.random_uniform([], minval=0, maxval=new_height - output_shape[0] + 1, dtype=tf.int32),
                      tf.random_uniform([], minval=0, maxval=new_width - output_shape[1] + 1, dtype=tf.int32))
        image = array_ops.slice(pad_image, [slice_offset[0], slice_offset[1], 0], [output_shape[0], output_shape[1], 3])
        image = tf.clip_by_value(image, 0, 1)
        resized_images.append(image)

    return resized_images
Example #9
0
    def train(self, feed_dict):

        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
                feed_dict[key])

        num_samples = float(feed_dict["num_samples"])
        one_hot_labels = _np.zeros((int(num_samples), self.num_classes))

        # convert to one hot
        labels = feed_dict["labels"].astype("int32").T
        one_hot_labels[_np.arange(int(num_samples)), labels] = 1

        _, final_train_loss, final_train_output = self.sess.run(
            [self.optimizer, self.cost, self.predictions],
            feed_dict={
                self.input: feed_dict['input'],
                self.one_hot_labels: one_hot_labels
            })

        result = {
            'loss': _np.array(final_train_loss),
            'output': _np.array(final_train_output)
        }

        return result
Example #10
0
    def __init__(
        self,
        net_params,
        batch_size,
        num_features,
        num_classes,
        prediction_window,
        seq_len,
        seed,
    ):

        _utils.suppress_tensorflow_warnings()
        self.gpu_policy = _utils.TensorFlowGPUPolicy()
        self.gpu_policy.start()

        for key in net_params.keys():
            net_params[key] = _utils.convert_shared_float_array_to_numpy(
                net_params[key]
            )

        self.ac_graph = _tf.Graph()
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.sess = _tf.Session(graph=self.ac_graph)
        with self.ac_graph.as_default():
            self.init_activity_classifier_graph(
                net_params, num_features, prediction_window, seed
            )
    def predict(self, feed_dict):

        is_train = "labels" in feed_dict

        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
                feed_dict[key])

        feed_dict_for_session = {self.input: feed_dict["input"]}

        if is_train:

            feed_dict_for_session[self.labels] = feed_dict["labels"]
            feed_dict_for_session[self.weights] = feed_dict["weights"]

            pred_probs, loss = self.sess.run([self.predictions, self.cost],
                                             feed_dict=feed_dict_for_session)

            result = {"loss": _np.array(loss), "output": _np.array(pred_probs)}
        else:
            pred_probs = self.sess.run([self.predictions],
                                       feed_dict=feed_dict_for_session)
            result = {"output": _np.array(pred_probs)}

        return result
Example #12
0
    def predict(self, feed_dict):

        is_train = ("labels" in feed_dict)

        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
                feed_dict[key])

        num_samples = float(feed_dict["num_samples"])
        one_hot_labels = _np.zeros((int(num_samples), self.num_classes))

        feed_dict_for_session = {self.input: feed_dict["input"]}

        if is_train:
            # convert to one hot
            labels = feed_dict["labels"].astype("int32").T
            one_hot_labels[_np.arange(int(num_samples)), labels] = 1

            feed_dict_for_session[self.one_hot_labels] = one_hot_labels

            pred_probs, loss = self.sess.run([self.predictions, self.cost],
                                             feed_dict=feed_dict_for_session)

            result = {'loss': _np.array(loss), 'output': _np.array(pred_probs)}
        else:
            pred_probs = self.sess.run([self.predictions],
                                       feed_dict=feed_dict_for_session)
            result = {'output': _np.array(pred_probs)}

        return result
Example #13
0
    def train(self, feed_dict):
        """
        Run session for training with new batch of data (inputs, labels and weights)

        Parameters
        ----------
        feed_dict: Dictionary
            Dictionary to store a batch of input data, corresponding labels and weights. This is currently
            passed from the ac_data_iterator.cpp file when a new batch of data is sent.

        Returns
        -------
        result: Dictionary
            Loss per batch and probabilities
        """
        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(feed_dict[key])
            feed_dict[key] = _np.squeeze(feed_dict[key], axis=1)
            feed_dict[key] =  _np.reshape(feed_dict[key], (feed_dict[key].shape[0], feed_dict[key].shape[1], feed_dict[key].shape[2]))

        _, loss, probs = self.sess.run([self.train_op, self.loss_per_seq, self.probs], 
            feed_dict={self.data : feed_dict['input'], self.target : feed_dict['labels'], self.weight : feed_dict['weights'], self.is_training : True})
        
        prob = _np.array(probs)
        probabilities = _np.reshape(prob, (prob.shape[0], prob.shape[1]*prob.shape[2]))
        result = {'loss' : _np.array(loss), 'output': probabilities }
        return result
    def predict(self, feed_dict):
        """
        Run session for predicting with new batch of data(Input)

        Parameters
        ----------
        feed_dict: Dictionary
            Dictionary to store a batch of input data.

        Returns
        -------
        output: TensorFlow Tensor
            Feature map from building the network.
        """
        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
                feed_dict[key])

        output = self.sess.run(
            [self.tf_model],
            feed_dict={
                self.images: feed_dict["input"],
                self.is_train: False
            },
        )

        # TODO: Include self.labels: feed_dict['label'] to handle labels from validation set
        result = {}
        result["output"] = _np.array(output[0])
        return result
    def train(self, feed_dict):
        """
        Run session for training with new batch of data(Input and Label)

        Parameters
        ----------
        feed_dict: Dictionary
            Dictionary to store a batch of input data, corresponding labels and iteration number. This is currently
            passed from the object_detector.py file when a new batch of data is sent.

        Returns
        -------
        loss_batch: TensorFlow Tensor
            Loss per batch
        """
        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
                feed_dict[key])
        feed_dict['labels'] = feed_dict['labels'].reshape(
            self.batch_size, self.grid_shape[0], self.grid_shape[1],
            self.num_anchors, self.num_classes + 5)

        _, loss_batch = self.sess.run([self.train_op, self.loss],
                                      feed_dict={
                                          self.images: feed_dict['input'],
                                          self.labels: feed_dict['labels']
                                      })
        result = {}
        result['loss'] = _np.array([loss_batch])
        return result
    def predict(self, feed_dict):
        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
                feed_dict[key])

        tf_input_shape = [None] + list(feed_dict['input'].shape)[1:]
        self.tf_input = _tf.placeholder(dtype=_tf.float32,
                                        shape=tf_input_shape)
        self._define_training_graph = False
        self.__define_graph()

        stylized_image = self.sess.run([self.output],
                                       feed_dict={
                                           self.tf_input: feed_dict['input'],
                                           self.tf_index: feed_dict['index']
                                       })
        stylized_raw = _np.array(stylized_image)

        expected_height = feed_dict['input'].shape[1]
        expected_width = feed_dict['input'].shape[2]

        # Crop to remove added padding
        stylized_cropped = stylized_raw[:, :, 0:expected_height,
                                        0:expected_width, :][0]

        return {"output": _np.array(stylized_cropped)}
    def predict(self, feed_dict):
        """
        Run session for predicting with new batch of validation data (inputs, labels and weights) as well as test data (inputs)

        Parameters
        ----------
        feed_dict: Dictionary
            Dictionary to store a batch of input data, corresponding labels and weights. This is currently
            passed from the ac_data_iterator.cpp file when a new batch of data is sent.

        Returns
        -------
        result: Dictionary
            Loss per batch and probabilities (in case of validation data)
            Probabilities (in case only inputs are provided)
        """

        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
                feed_dict[key])
            feed_dict[key] = _np.squeeze(feed_dict[key], axis=1)
            feed_dict[key] = _np.reshape(
                feed_dict[key],
                (
                    feed_dict[key].shape[0],
                    feed_dict[key].shape[1],
                    feed_dict[key].shape[2],
                ),
            )

        if len(feed_dict.keys()) == 1:
            probs = self.sess.run(
                self.probs,
                feed_dict={
                    self.data: feed_dict["input"],
                    self.is_training: False
                },
            )
            prob = _np.array(probs)
            probabilities = _np.reshape(
                prob, (prob.shape[0], prob.shape[1] * prob.shape[2]))
            result = {"output": probabilities}
        else:
            loss, probs = self.sess.run(
                [self.loss_per_seq, self.probs],
                feed_dict={
                    self.data: feed_dict["input"],
                    self.target: feed_dict["labels"],
                    self.weight: feed_dict["weights"],
                    self.is_training: False,
                },
            )
            prob = _np.array(probs)
            probabilities = _np.reshape(
                prob, (prob.shape[0], prob.shape[1] * prob.shape[2]))
            result = {"loss": _np.array(loss), "output": probabilities}
        return result
Example #18
0
    def __init__(self, config, net_params):

        self.gpu_policy = _utils.TensorFlowGPUPolicy()
        self.gpu_policy.start()

        for key in net_params.keys():
            net_params[key] = _utils.convert_shared_float_array_to_numpy(
                net_params[key])

        for key in config.keys():
            config[key] = _utils.convert_shared_float_array_to_numpy(
                config[key])

        self.st_graph = _tf.Graph()
        self._batch_size = 1
        self._finetune_all_params = True
        self._define_training_graph = bool(config['st_training'])
        self.sess = _tf.Session(graph=self.st_graph)
        with self.st_graph.as_default():
            self.init_style_transfer_graph(net_params)
 def train(self, feed_dict):
     for key in feed_dict.keys():
         feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
             feed_dict[key])
     _, loss_value = self.sess.run(
         [self.optimizer, self.loss],
         feed_dict={
             self.tf_input: feed_dict['input'],
             self.tf_index: feed_dict['index'],
             self.tf_style: feed_dict['labels']
         })
     return {"loss": _np.array(loss_value)}
    def predict(self, feed_dict):
        """
        Run session for predicting with new batch of validation data (inputs, labels and weights) as well as test data (inputs)

        Parameters
        ----------
        feed_dict: Dictionary
            Dictionary to store a batch of input data, corresponding labels and weights. This is currently
            passed from the ac_data_iterator.cpp file when a new batch of data is sent.

        Returns
        -------
        result: Dictionary
            Loss per batch and probabilities (in case of validation data)
            Probabilities (in case only inputs are provided)
        """
        # Convert input
        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
                feed_dict[key])
            feed_dict[key] = _np.squeeze(feed_dict[key], axis=1)
            feed_dict[key] = _np.reshape(
                feed_dict[key],
                (
                    feed_dict[key].shape[0],
                    feed_dict[key].shape[1],
                    feed_dict[key].shape[2],
                ),
            )

        # Generate predictions
        prob = self.model.predict(feed_dict['input'])
        probabilities = _np.reshape(
            prob, (prob.shape[0], prob.shape[1] * prob.shape[2]))
        result = {"output": probabilities}

        if "labels" in feed_dict.keys():  # Validation data?
            keras = _lazy_import_tensorflow().keras
            labels = keras.utils.to_categorical(feed_dict['labels'],
                                                num_classes=self.num_classes)

            loss = self.model.loss(y_true=labels, y_pred=prob)
            loss = keras.backend.get_value(loss)

            weights = feed_dict["weights"].reshape(loss.shape)
            loss = loss * weights
            loss = _np.sum(loss, axis=1)

            result["loss"] = loss

        return result
    def __init__(self, net_params, batch_size, num_classes):
        """
        Defines the TensorFlow model, loss, optimisation and accuracy. Then
        loads the weights into the model.
        """
        self.gpu_policy = _utils.TensorFlowGPUPolicy()
        self.gpu_policy.start()

        for key in net_params.keys():
            net_params[key] = _utils.convert_shared_float_array_to_numpy(
                net_params[key])

        self.dc_graph = _tf.Graph()
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.sess = _tf.Session(graph=self.dc_graph)
        with self.dc_graph.as_default():
            self.init_drawing_classifier_graph(net_params)
    def train(self, feed_dict):
        """
        Run session for training with new batch of data (inputs, labels and weights)

        Parameters
        ----------
        feed_dict: Dictionary
            Dictionary to store a batch of input data, corresponding labels and weights. This is currently
            passed from the ac_data_iterator.cpp file when a new batch of data is sent.

        Returns
        -------
        result: Dictionary
            Loss per batch and probabilities
        """
        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
                feed_dict[key])
            feed_dict[key] = _np.squeeze(feed_dict[key], axis=1)
            feed_dict[key] = _np.reshape(
                feed_dict[key],
                (
                    feed_dict[key].shape[0],
                    feed_dict[key].shape[1],
                    feed_dict[key].shape[2],
                ),
            )

        keras = _lazy_import_tensorflow().keras
        loss = self.model.train_on_batch(
            x=feed_dict['input'],
            y=keras.utils.to_categorical(feed_dict['labels'],
                                         num_classes=self.num_classes),
            sample_weight=_np.reshape(feed_dict['weights'],
                                      (self.batch_size, 20)))

        prob = self.model.predict(feed_dict['input'])
        probabilities = _np.reshape(
            prob, (prob.shape[0], prob.shape[1] * prob.shape[2]))

        result = {"loss": _np.array(loss), "output": _np.array(probabilities)}
        return result
Example #23
0
    def train(self, feed_dict):

        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(feed_dict[key])

        _, final_train_loss, final_train_output = self.sess.run(
            [self.optimizer, self.cost, self.predictions],
            feed_dict={
                self.input: feed_dict["input"],
                self.labels: feed_dict["labels"],
                self.weights: feed_dict["weights"],
            },
        )

        result = {
            "loss": _np.array(final_train_loss),
            "output": _np.array(final_train_output),
        }

        return result
    def predict(self, feed_dict):
        for key in feed_dict.keys():
            feed_dict[key] = _utils.convert_shared_float_array_to_numpy(
                feed_dict[key])

        with self.st_graph.as_default():
            stylized_image = self.sess.run(
                fetches=[self.output],
                feed_dict={
                    self.tf_input: feed_dict["input"],
                    self.tf_index: feed_dict["index"],
                },
            )

        stylized_raw = _np.array(stylized_image)

        expected_height = feed_dict["input"].shape[1]
        expected_width = feed_dict["input"].shape[2]

        # Crop to remove added padding
        stylized_cropped = stylized_raw[:, :, 0:expected_height,
                                        0:expected_width, :][0]

        return {"output": _np.array(stylized_cropped)}
    def loss_layer(self, predict, labels):
        """
        Define loss layer

        Parameters
        ----------
        predict: TensorFlow Tensor
            The predicted values for the batch of data
        labels: TensorFlow Tensor
            Ground truth labels for the batch of data

        Returns
        -------
        loss: TensorFlow Tensor
            Loss (combination of regression and classification losses)
        """
        _tf = _lazy_import_tensorflow()

        POS_IOU = 0.7
        NEG_IOU = 0.3

        rescore = int(
            _utils.convert_shared_float_array_to_numpy(
                self.config.get("od_rescore")))
        lmb_coord_xy = _utils.convert_shared_float_array_to_numpy(
            self.config.get("lmb_coord_xy"))
        lmb_coord_wh = _utils.convert_shared_float_array_to_numpy(
            self.config.get("lmb_coord_wh"))
        lmb_obj = _utils.convert_shared_float_array_to_numpy(
            self.config.get("lmb_obj"))
        lmb_noobj = _utils.convert_shared_float_array_to_numpy(
            self.config.get("lmb_noobj"))
        lmb_class = _utils.convert_shared_float_array_to_numpy(
            self.config.get("lmb_class"))

        # Prediction values from model on the images
        ypred = _tf.reshape(
            predict,
            [-1] + list(self.grid_shape) +
            [self.num_anchors, 5 + self.num_classes],
        )
        raw_xy = ypred[..., 0:2]
        raw_wh = ypred[..., 2:4]
        raw_conf = ypred[..., 4]
        class_scores = ypred[..., 5:]

        tf_anchors = _tf.constant(self.anchors)

        # Ground Truth info derived from ymap/labels
        gt_xy = labels[..., 0:2]
        gt_wh = labels[..., 2:4]
        gt_raw_wh = _tf.math.log(gt_wh / tf_anchors + 1e-5)
        gt_conf = labels[..., 4]
        gt_class = labels[..., 5:]

        # Calculations on predicted confidences
        xy = _tf.sigmoid(raw_xy)
        wh = _tf.exp(raw_wh) * tf_anchors
        wh_anchors = _tf.exp(raw_wh * 0.0) * tf_anchors
        lo = xy - wh / 2
        hi = xy + wh / 2

        gt_area = gt_wh[..., 0] * gt_wh[..., 1]
        gt_lo = gt_xy - gt_wh / 2
        gt_hi = gt_xy + gt_wh / 2

        c_inter = _tf.maximum(2 * _tf.minimum(wh_anchors / 2, gt_wh / 2), 0)
        c_area = wh_anchors[..., 0] * wh_anchors[..., 1]
        c_inter_area = c_inter[..., 0] * c_inter[..., 1]
        c_iou = c_inter_area / (c_area + gt_area - c_inter_area)

        inter = _tf.maximum(_tf.minimum(hi, gt_hi) - _tf.maximum(lo, gt_lo), 0)
        area = wh[..., 0] * wh[..., 1]
        inter_area = inter[..., 0] * inter[..., 1]
        iou = inter_area / (area + gt_area - inter_area)
        active_iou = c_iou

        cond_gt = _tf.cast(_tf.equal(gt_conf, _tf.constant(1.0)),
                           dtype=_tf.float32)
        max_iou = _tf.reduce_max(active_iou, 3, keepdims=True)
        cond_max = _tf.cast(_tf.equal(active_iou, max_iou), dtype=_tf.float32)

        cond_above = c_iou > POS_IOU

        cond_logical_or = _tf.cast(
            _tf.math.logical_or(_tf.cast(cond_max, dtype=_tf.bool),
                                _tf.cast(cond_above, dtype=_tf.bool)),
            dtype=_tf.float32,
        )
        cond_obj = _tf.cast(
            _tf.math.logical_and(
                _tf.cast(cond_gt, dtype=_tf.bool),
                _tf.cast(cond_logical_or, dtype=_tf.bool),
            ),
            dtype=_tf.float32,
        )

        kr_obj_ij = _tf.stop_gradient(cond_obj)

        cond_below = c_iou < NEG_IOU

        cond_logical_not = _tf.cast(_tf.math.logical_not(
            _tf.cast(cond_obj, dtype=_tf.bool)),
                                    dtype=_tf.float32)
        cond_noobj = _tf.cast(
            _tf.math.logical_and(
                _tf.cast(cond_below, dtype=_tf.bool),
                _tf.cast(cond_logical_not, dtype=_tf.bool),
            ),
            dtype=_tf.float32,
        )

        kr_noobj_ij = _tf.stop_gradient(cond_noobj)

        count = _tf.reduce_sum(kr_obj_ij)
        eps_count = _tf.math.add(count, _tf.constant(1e-4))

        scale_conf = 1 / (self.batch_size * self.grid_shape[0] *
                          self.grid_shape[1])

        kr_obj_ij_plus1 = _tf.expand_dims(kr_obj_ij, -1)
        if rescore:
            obj_gt_conf = kr_obj_ij * _tf.stop_gradient(iou)
        else:
            obj_gt_conf = kr_obj_ij

        obj_w_obj = kr_obj_ij * lmb_obj
        obj_w_noobj = kr_noobj_ij * lmb_noobj

        obj_w = _tf.math.add(obj_w_obj, obj_w_noobj)

        loss_xy = (lmb_coord_xy *
                   _tf.reduce_sum(kr_obj_ij_plus1 * _tf.square(gt_xy - xy)) /
                   eps_count)
        loss_wh = _tf.losses.huber_loss(
            labels=gt_raw_wh,
            predictions=raw_wh,
            weights=lmb_coord_wh * kr_obj_ij_plus1,
            delta=1.0,
        )
        loss_conf = scale_conf * _tf.reduce_sum(
            obj_w * _tf.nn.sigmoid_cross_entropy_with_logits(
                labels=obj_gt_conf, logits=raw_conf))
        loss_cls = (lmb_class * _tf.reduce_sum(
            kr_obj_ij * _tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=gt_class, logits=class_scores)) / eps_count)

        losses = [loss_xy, loss_wh, loss_conf, loss_cls]
        loss = _tf.add_n(losses)

        return loss
Example #26
0
    def __init__(self, net_params, batch_size, num_classes):
        """
        Defines the TensorFlow model, loss, optimisation and accuracy. Then
        loads the MXNET weights into the model.

        """
        self.gpu_policy = _utils.TensorFlowGPUPolicy()
        self.gpu_policy.start()

        for key in net_params.keys():
            net_params[key] = _utils.convert_shared_float_array_to_numpy(
                net_params[key])

        _tf.reset_default_graph()

        self.num_classes = num_classes
        self.batch_size = batch_size

        self.input = _tf.placeholder(_tf.float32, [None, 28, 28, 1])

        self.one_hot_labels = _tf.placeholder(_tf.int32,
                                              [None, self.num_classes])

        # Weights
        weights = {
            'drawing_conv0_weight':
            _tf.Variable(_tf.zeros([3, 3, 1, 16]),
                         name='drawing_conv0_weight'),
            'drawing_conv1_weight':
            _tf.Variable(_tf.zeros([3, 3, 16, 32]),
                         name='drawing_conv1_weight'),
            'drawing_conv2_weight':
            _tf.Variable(_tf.zeros([3, 3, 32, 64]),
                         name='drawing_conv2_weight'),
            'drawing_dense0_weight':
            _tf.Variable(_tf.zeros([576, 128]), name='drawing_dense0_weight'),
            'drawing_dense1_weight':
            _tf.Variable(_tf.zeros([128, self.num_classes]),
                         name='drawing_dense1_weight')
        }

        # Biases
        biases = {
            'drawing_conv0_bias':
            _tf.Variable(_tf.zeros([16]), name='drawing_conv0_bias'),
            'drawing_conv1_bias':
            _tf.Variable(_tf.zeros([32]), name='drawing_conv1_bias'),
            'drawing_conv2_bias':
            _tf.Variable(_tf.zeros([64]), name='drawing_conv2_bias'),
            'drawing_dense0_bias':
            _tf.Variable(_tf.zeros([128]), name='drawing_dense0_bias'),
            'drawing_dense1_bias':
            _tf.Variable(_tf.zeros([self.num_classes]),
                         name='drawing_dense1_bias')
        }

        conv_1 = _tf.nn.conv2d(self.input,
                               weights["drawing_conv0_weight"],
                               strides=1,
                               padding='SAME')
        conv_1 = _tf.nn.bias_add(conv_1, biases["drawing_conv0_bias"])
        relu_1 = _tf.nn.relu(conv_1)
        pool_1 = _tf.nn.max_pool2d(relu_1,
                                   ksize=[1, 2, 2, 1],
                                   strides=[1, 2, 2, 1],
                                   padding='VALID')

        conv_2 = _tf.nn.conv2d(pool_1,
                               weights["drawing_conv1_weight"],
                               strides=1,
                               padding='SAME')
        conv_2 = _tf.nn.bias_add(conv_2, biases["drawing_conv1_bias"])
        relu_2 = _tf.nn.relu(conv_2)
        pool_2 = _tf.nn.max_pool2d(relu_2,
                                   ksize=[1, 2, 2, 1],
                                   strides=[1, 2, 2, 1],
                                   padding='VALID')

        conv_3 = _tf.nn.conv2d(pool_2,
                               weights["drawing_conv2_weight"],
                               strides=1,
                               padding='SAME')
        conv_3 = _tf.nn.bias_add(conv_3, biases["drawing_conv2_bias"])
        relu_3 = _tf.nn.relu(conv_3)
        pool_3 = _tf.nn.max_pool2d(relu_3,
                                   ksize=[1, 2, 2, 1],
                                   strides=[1, 2, 2, 1],
                                   padding='VALID')

        # Flatten the data to a 1-D vector for the fully connected layer
        fc1 = _tf.reshape(pool_3, (-1, 576))

        fc1 = _tf.nn.xw_plus_b(fc1,
                               weights=weights["drawing_dense0_weight"],
                               biases=biases["drawing_dense0_bias"])

        fc1 = _tf.nn.relu(fc1)

        out = _tf.nn.xw_plus_b(fc1,
                               weights=weights["drawing_dense1_weight"],
                               biases=biases["drawing_dense1_bias"])
        softmax_out = _tf.nn.softmax(out)

        self.predictions = softmax_out

        # Loss
        self.cost = _tf.losses.softmax_cross_entropy(
            logits=out,
            onehot_labels=self.one_hot_labels,
            reduction=_tf.losses.Reduction.NONE)

        # Optimizer
        self.optimizer = _tf.train.AdamOptimizer(learning_rate=0.001).minimize(
            self.cost)

        # Predictions
        correct_prediction = _tf.equal(_tf.argmax(self.predictions, 1),
                                       _tf.argmax(self.one_hot_labels, 1))

        self.sess = _tf.Session()
        self.sess.run(_tf.global_variables_initializer())

        # Assign the initialised weights from C++ to tensorflow
        layers = [
            'drawing_conv0_weight', 'drawing_conv0_bias',
            'drawing_conv1_weight', 'drawing_conv1_bias',
            'drawing_conv2_weight', 'drawing_conv2_bias',
            'drawing_dense0_weight', 'drawing_dense0_bias',
            'drawing_dense1_weight', 'drawing_dense1_bias'
        ]

        for key in layers:
            if 'bias' in key:
                self.sess.run(
                    _tf.assign(
                        _tf.get_default_graph().get_tensor_by_name(key + ":0"),
                        net_params[key]))
            else:
                if 'drawing_dense0_weight' in key:
                    '''
                    To make output of CoreML pool3 (NCHW) compatible with TF (NHWC).
                    Decompose FC weights to NCHW. Transpose to NHWC. Reshape back to FC.
                    '''
                    coreml_128_576 = net_params[key]
                    coreml_128_576 = _np.reshape(coreml_128_576,
                                                 (128, 64, 3, 3))
                    coreml_128_576 = _np.transpose(coreml_128_576,
                                                   (0, 2, 3, 1))
                    coreml_128_576 = _np.reshape(coreml_128_576, (128, 576))
                    self.sess.run(
                        _tf.assign(
                            _tf.get_default_graph().get_tensor_by_name(key +
                                                                       ":0"),
                            _np.transpose(coreml_128_576, (1, 0))))
                elif 'dense' in key:
                    dense_weights = _utils.convert_dense_coreml_to_tf(
                        net_params[key])
                    self.sess.run(
                        _tf.assign(
                            _tf.get_default_graph().get_tensor_by_name(key +
                                                                       ":0"),
                            dense_weights))
                else:
                    # TODO: Call _utils.convert_conv2d_coreml_to_tf when #2513 is merged
                    self.sess.run(
                        _tf.assign(
                            _tf.get_default_graph().get_tensor_by_name(key +
                                                                       ":0"),
                            _np.transpose(net_params[key], (2, 3, 1, 0))))
    def __init__(
        self,
        net_params,
        batch_size,
        num_features,
        num_classes,
        prediction_window,
        seq_len,
        seed,
    ):
        _utils.suppress_tensorflow_warnings()

        self.num_classes = num_classes
        self.batch_size = batch_size

        tf = _lazy_import_tensorflow()
        keras = tf.keras

        #############################################
        # Define the Neural Network
        #############################################
        inputs = keras.Input(shape=(prediction_window * seq_len, num_features))

        # First dense layer
        dense = keras.layers.Conv1D(
            filters=CONV_H,
            kernel_size=(prediction_window),
            padding='same',
            strides=prediction_window,
            use_bias=True,
            activation='relu',
        )
        cur_outputs = dense(inputs)

        # First dropout layer
        dropout = keras.layers.Dropout(
            rate=0.2,
            seed=seed,
        )
        cur_outputs = dropout(cur_outputs)

        # LSTM layer
        lstm = keras.layers.LSTM(
            units=LSTM_H,
            return_sequences=True,
            use_bias=True,
        )
        cur_outputs = lstm(cur_outputs)

        # Second dense layer
        dense2 = keras.layers.Dense(DENSE_H)
        cur_outputs = dense2(cur_outputs)

        # Batch norm layer
        batch_norm = keras.layers.BatchNormalization()
        cur_outputs = batch_norm(cur_outputs)

        # ReLU layer
        relu = keras.layers.ReLU()
        cur_outputs = relu(cur_outputs)

        # Final dropout layer
        dropout = keras.layers.Dropout(rate=0.5, seed=seed)
        cur_outputs = dropout(cur_outputs)

        # Final dense layer
        dense3 = keras.layers.Dense(num_classes, use_bias=False)
        cur_outputs = dense3(cur_outputs)

        # Softmax layer
        softmax = keras.layers.Softmax()
        cur_outputs = softmax(cur_outputs)

        self.model = keras.Model(inputs=inputs, outputs=cur_outputs)
        self.model.compile(loss=tf.losses.categorical_crossentropy,
                           optimizer=keras.optimizers.Adam(learning_rate=1e-3),
                           sample_weight_mode="temporal")

        #############################################
        # Load the Weights of the Neural Network
        #############################################
        for key in net_params.keys():
            net_params[key] = _utils.convert_shared_float_array_to_numpy(
                net_params[key])

        # Set weight for first dense layer
        l = self.model.layers[1]
        l.set_weights(
            (_utils.convert_conv1d_coreml_to_tf(net_params["conv_weight"]),
             net_params["conv_bias"]))

        # Set LSTM weights
        i2h, h2h, bias = [], [], []
        for i in ('i', 'f', 'c', 'o'):
            i2h.append(eval('net_params["lstm_i2h_%s_weight"]' % i))
            h2h.append(eval('net_params["lstm_h2h_%s_weight"]' % i))
            bias.append(eval('net_params["lstm_h2h_%s_bias"]' % i))
        i2h = _np.concatenate(i2h, axis=0)
        h2h = _np.concatenate(h2h, axis=0)
        bias = _np.concatenate(bias, axis=0)
        i2h = _np.swapaxes(i2h, 1, 0)
        h2h = _np.swapaxes(h2h, 1, 0)
        l = self.model.layers[3]
        l.set_weights((i2h, h2h, bias))

        # Set weight for second dense layer
        l = self.model.layers[4]
        l.set_weights(
            (net_params['dense0_weight'].reshape(DENSE_H,
                                                 LSTM_H).swapaxes(0, 1),
             net_params['dense0_bias']))

        # Set batch Norm weights
        l = self.model.layers[5]
        l.set_weights(
            (net_params['bn_gamma'], net_params['bn_beta'],
             net_params['bn_running_mean'], net_params['bn_running_var']))

        # Set weights for last dense layer
        l = self.model.layers[8]
        l.set_weights((net_params['dense1_weight'].reshape(
            (self.num_classes, DENSE_H)).swapaxes(0, 1), ))
    def __init__(self, net_params, batch_size, num_features, num_classes,
                 prediction_window, seq_len):

        self.gpu_policy = _utils.TensorFlowGPUPolicy()
        self.gpu_policy.start()

        for key in net_params.keys():
            net_params[key] = _utils.convert_shared_float_array_to_numpy(
                net_params[key])

        _tf.reset_default_graph()

        self.num_classes = num_classes
        self.batch_size = batch_size
        self.seq_len = seq_len

        # Vars
        self.data = _tf.placeholder(
            _tf.float32, [None, prediction_window * seq_len, num_features])
        self.weight = _tf.placeholder(_tf.float32, [None, seq_len, 1])
        self.target = _tf.placeholder(_tf.int32, [None, seq_len, 1])
        self.is_training = _tf.placeholder(_tf.bool)

        # Reshaping weights
        reshaped_weight = _tf.reshape(self.weight, [self.batch_size, seq_len])

        # One hot encoding target
        reshaped_target = _tf.reshape(self.target, [self.batch_size, seq_len])
        one_hot_target = _tf.one_hot(reshaped_target,
                                     depth=self.num_classes,
                                     axis=-1)

        # Weights
        self.weights = {
            'conv_weight':
            _tf.Variable(_tf.zeros([prediction_window, num_features, CONV_H]),
                         name='conv_weight'),
            'dense0_weight':
            _tf.Variable(_tf.zeros([LSTM_H, DENSE_H]), name='dense0_weight'),
            'dense1_weight':
            _tf.Variable(_tf.zeros([DENSE_H, self.num_classes]),
                         name='dense1_weight')
        }

        # Biases
        self.biases = {
            'conv_bias':
            _tf.Variable(_tf.zeros([CONV_H]), name='conv_bias'),
            'dense0_bias':
            _tf.Variable(_tf.zeros([DENSE_H]), name='dense0_bias'),
            'dense1_bias':
            _tf.Variable(_tf.zeros([num_classes]), name='dense1_bias')
        }

        # Convolution
        conv = _tf.nn.conv1d(self.data,
                             self.weights['conv_weight'],
                             stride=prediction_window,
                             padding='SAME')
        conv = _tf.nn.bias_add(conv, self.biases['conv_bias'])
        conv = _tf.nn.relu(conv)

        dropout = _tf.layers.dropout(conv, rate=0.2, training=self.is_training)

        # Long Stem Term Memory
        lstm = self.load_lstm_weights_params(net_params)
        cells = _tf.nn.rnn_cell.LSTMCell(num_units=LSTM_H,
                                         reuse=_tf.AUTO_REUSE,
                                         forget_bias=0.0,
                                         initializer=_tf.initializers.constant(
                                             lstm, verify_shape=True))
        init_state = cells.zero_state(batch_size, _tf.float32)
        rnn_outputs, final_state = _tf.nn.dynamic_rnn(cells,
                                                      dropout,
                                                      initial_state=init_state)

        # Dense
        dense = _tf.reshape(rnn_outputs, (-1, LSTM_H))
        dense = _tf.add(_tf.matmul(dense, self.weights['dense0_weight']),
                        self.biases['dense0_bias'])
        dense = _tf.layers.batch_normalization(
            inputs=dense,
            beta_initializer=_tf.initializers.constant(net_params['bn_beta'],
                                                       verify_shape=True),
            gamma_initializer=_tf.initializers.constant(net_params['bn_gamma'],
                                                        verify_shape=True),
            moving_mean_initializer=_tf.initializers.constant(
                net_params['bn_running_mean'], verify_shape=True),
            moving_variance_initializer=_tf.initializers.constant(
                net_params['bn_running_var'], verify_shape=True),
            training=self.is_training)
        dense = _tf.nn.relu(dense)
        dense = _tf.layers.dropout(dense, rate=0.5, training=self.is_training)

        # Output
        out = _tf.add(_tf.matmul(dense, self.weights['dense1_weight']),
                      self.biases['dense1_bias'])
        out = _tf.reshape(out, (-1, self.seq_len, self.num_classes))
        self.probs = _tf.nn.softmax(out)

        # Weights
        seq_sum_weights = _tf.reduce_sum(reshaped_weight, axis=1)
        binary_seq_sum_weights = _tf.reduce_sum(
            _tf.cast(seq_sum_weights > 0, dtype=_tf.float32))

        # Loss
        loss = _tf.losses.softmax_cross_entropy(
            logits=out,
            onehot_labels=one_hot_target,
            weights=reshaped_weight,
            reduction=_tf.losses.Reduction.NONE)
        self.loss_per_seq = _tf.reduce_sum(loss,
                                           axis=1) / (seq_sum_weights + 1e-5)
        self.loss_op = _tf.reduce_sum(
            self.loss_per_seq) / (binary_seq_sum_weights + 1e-5)

        # Optimizer
        update_ops = _tf.get_collection(_tf.GraphKeys.UPDATE_OPS)
        self.set_learning_rate(1e-3)
        train_op = self.optimizer.minimize(self.loss_op)
        self.train_op = _tf.group([train_op, update_ops])

        # Session
        self.sess = _tf.Session()

        # Initialize all variables
        self.sess.run(_tf.global_variables_initializer())
        self.sess.run(_tf.local_variables_initializer())

        self.load_weights(net_params)
Example #29
0
    def loss_layer(self, predict, labels):
        """
        Define loss layer

        Parameters
        ----------
        predict: TensorFlow Tensor
            The predicted values for the batch of data
        labels: TensorFlow Tensor
            Ground truth labels for the batch of data

        Returns
        -------
        loss: TensorFlow Tensor
            Loss (combination of regression and classification losses)
        """
        rescore = int(_utils.convert_shared_float_array_to_numpy(self.config.get('od_rescore')))
        lmb_coord_xy = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_coord_xy'))
        lmb_coord_wh = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_coord_wh'))
        lmb_obj = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_obj'))
        lmb_noobj = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_noobj'))
        lmb_class = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_class'))

        # Prediction values from model on the images
        ypred = _tf.reshape(predict, [-1] + list(self.grid_shape) + [self.num_anchors, 5 + self.num_classes])
        raw_xy = ypred[..., 0:2]
        raw_wh = ypred[..., 2:4]
        raw_conf = ypred[..., 4]
        class_scores = ypred[..., 5:]

        tf_anchors = _tf.constant(self.anchors)

        # Ground Truth info derived from ymap/labels
        gt_xy = labels[..., 0:2]
        gt_wh = labels[..., 2:4]
        gt_raw_wh = _tf.math.log(gt_wh / tf_anchors + 1e-5)
        gt_conf = labels[..., 4]
        gt_conf0 = labels[..., 0:1, 4]
        gt_class = labels[..., 5:]

        # Calculations on predicted confidences
        xy = _tf.sigmoid(raw_xy)
        wh = _tf.exp(raw_wh) * tf_anchors
        wh_anchors = _tf.exp(raw_wh * 0.0) * tf_anchors
        lo = xy - wh / 2
        hi = xy + wh / 2

        gt_area = gt_wh[..., 0] * gt_wh[..., 1]
        gt_lo = gt_xy - gt_wh / 2
        gt_hi = gt_xy + gt_wh / 2

        c_inter = _tf.maximum(2 * _tf.minimum(wh_anchors / 2, gt_wh / 2), 0)
        c_area = wh_anchors[..., 0] * wh_anchors[..., 1]
        c_inter_area = c_inter[..., 0] * c_inter[..., 1]
        c_iou = c_inter_area / (c_area + gt_area - c_inter_area)

        inter = _tf.maximum(_tf.minimum(hi, gt_hi) - _tf.maximum(lo, gt_lo), 0)
        area = wh[..., 0] * wh[..., 1]
        inter_area = inter[..., 0] * inter[..., 1]
        iou = inter_area / (area + gt_area - inter_area)
        active_iou = c_iou

        max_iou = _tf.reduce_max(active_iou, 3, keepdims=True)
        resp_box = _tf.cast(_tf.equal(active_iou, max_iou), dtype=_tf.float32)
        count = _tf.reduce_sum(gt_conf0)

        kr_obj_ij = _tf.stop_gradient(resp_box * gt_conf)

        kr_noobj_ij = 1 - kr_obj_ij
        s = 1 / (self.batch_size * self.grid_shape[0] * self.grid_shape[1])
        kr_obj_ij_plus1 = _tf.expand_dims(kr_obj_ij, -1)

        if rescore:
            obj_gt_conf = kr_obj_ij * _tf.stop_gradient(iou)
        else:
            obj_gt_conf = kr_obj_ij
        kr_box = kr_obj_ij_plus1
        obj_w = (kr_obj_ij * lmb_obj + kr_noobj_ij * lmb_noobj)

        loss_xy = lmb_coord_xy * _tf.reduce_sum(kr_box * _tf.square(gt_xy - xy)) / (count + 0.01)

        loss_wh = _tf.losses.huber_loss (labels=gt_raw_wh, predictions=raw_wh, weights=lmb_coord_wh * kr_box,
                                                   delta= 1.0)
        # Confidence loss
        loss_conf = s * _tf.reduce_sum(
            obj_w * _tf.nn.sigmoid_cross_entropy_with_logits(labels=obj_gt_conf, logits=raw_conf))

        # TODO: tf.nn.softmax_cross_entropy_with_logits_v2 instead of tf.nn.softmax_cross_entropy_with_logits
        loss_cls = lmb_class * _tf.reduce_sum(
            kr_obj_ij * _tf.nn.softmax_cross_entropy_with_logits_v2(labels=gt_class, logits=class_scores)) / (
                           count + 0.01)
        losses = [loss_xy, loss_wh, loss_conf, loss_cls]
        loss = _tf.add_n(losses)
        return loss
def get_augmented_images(images, output_shape):

    # Store transformations and augmented_images for the input batch
    transformations = []
    augmented_images = []

    # Augmentation option
    min_scale = 1/1.5 
    max_scale = 1.5 
    max_aspect_ratio=1.5 
    max_hue=0.05 
    max_brightness=0.05 
    max_saturation=1.25 
    max_contrast=1.25 
    horizontal_flip=True 

    for i in range(len(images)):

        image = images[i]
        image = _utils.convert_shared_float_array_to_numpy(image)
        
        height, width, _ = tf.unstack(tf.shape(image))
        scale_h = tf.random_uniform([], minval=min_scale, maxval=max_scale)
        scale_w = scale_h * tf.exp(tf.random_uniform([], minval=-np.log(max_aspect_ratio), maxval=np.log(max_aspect_ratio)))
        new_height = tf.to_int32(tf.to_float(height) * scale_h)
        new_width = tf.to_int32(tf.to_float(width) * scale_w)

        image_scaled = tf.squeeze(tf.image.resize_bilinear(tf.expand_dims(image, 0), [new_height, new_width]), [0])
        # Image padding
        pad_image, pad_offset = pad_to_ensure_size(image_scaled, output_shape[0], output_shape[1])

        new_height = tf.maximum(output_shape[0], new_height)
        new_width = tf.maximum(output_shape[1], new_width)

        slice_offset = (tf.random_uniform([], minval=0, maxval=new_height - output_shape[0] + 1, dtype=tf.int32),
                        tf.random_uniform([], minval=0, maxval=new_width - output_shape[1] + 1, dtype=tf.int32))
        augmented_image = array_ops.slice(pad_image, [slice_offset[0], slice_offset[1], 0], [output_shape[0], output_shape[1], 3])

        if horizontal_flip:
            uniform_random = random_ops.random_uniform([], 0, 1.0)
            did_horiz_flip = math_ops.less(uniform_random, .5)
            augmented_image = control_flow_ops.cond(did_horiz_flip,
                                             lambda: array_ops.reverse(augmented_image, [1]),
                                             lambda: augmented_image)
            flip_sign = 1 - tf.to_float(did_horiz_flip) * 2
        else:
            flip_sign = 1
            did_horiz_flip = tf.constant(False)

        ty = tf.to_float(pad_offset[0] - slice_offset[0] ) 
        tx = flip_sign * tf.to_float(pad_offset[1] - slice_offset[1] ) + tf.to_float(did_horiz_flip) * output_shape[1]

        # Make the transformation matrix
        transformation = tf.reshape(tf.stack([
            scale_h, 0.0,                  ty,
            0.0,     flip_sign * scale_w,   tx,
            0.0,     0.0,                 1.0]
            ), (3, 3))

        if max_hue is not None and max_hue > 0:
            image = tf.image.random_hue(augmented_image, max_delta=max_hue)

        if max_brightness is not None and max_brightness > 0:
            image = tf.image.random_brightness(augmented_image, max_delta=max_brightness)

        if max_saturation is not None and max_saturation > 1.0:
            log_sat = np.log(max_saturation)
            image = tf.image.random_saturation(augmented_image, lower=np.exp(-log_sat), upper=np.exp(log_sat))

        if max_contrast is not None and max_contrast > 1.0:
            log_con = np.log(max_contrast)
            image = tf.image.random_contrast(augmented_image, lower=np.exp(-log_con), upper=np.exp(log_con))

        augmented_image = tf.clip_by_value(augmented_image, 0, 1)
        augmented_images.append(augmented_image)
        transformations.append(transformation)
        
    return augmented_images, transformations