Esempio n. 1
0
def shaped_py_func(func, inputs, types, shapes, stateful=True, name=None):
    """Wrapper around tf.py_func that adds static shape information to the output.

  Args:
    func: Python function to call.
    inputs: List of input tensors.
    types: List of output tensor types.
    shapes: List of output tensor shapes.
    stateful: Whether or not the python function is stateful.
    name: Name of the op.

  Returns:
    output_tensors: List of output tensors.
  """
    output_tensors = tf.py_func(func=func,
                                inp=inputs,
                                Tout=types,
                                stateful=stateful,
                                name=name)
    for t, s in zip(output_tensors, shapes):
        t.set_shape(s)
    return output_tensors
Esempio n. 2
0
def tf_put_text(imgs,
                texts,
                text_size=1,
                text_pos=(0, 30),
                text_color=(0, 0, 1)):
    """Adds text to an image tensor."""
    def _put_text(imgs, texts):
        """Python function that renders text onto a image."""
        result = np.empty_like(imgs)
        for i in range(imgs.shape[0]):
            text = texts[i]
            if isinstance(text, bytes):
                text = six.ensure_text(text)
            # You may need to adjust text size and position and size.
            # If your images are in [0, 255] range replace (0, 0, 1) with (0, 0, 255)
            result[i, :, :, :] = cv2.putText(imgs[i, :, :, :], str(text),
                                             text_pos,
                                             cv2.FONT_HERSHEY_COMPLEX,
                                             text_size, text_color, 1)
        return result

    return tf.py_func(_put_text, [imgs, texts], Tout=imgs.dtype)
Esempio n. 3
0
  def reset(self, indices=None):
    """Reset the batch of environments.

    Args:
      indices: The batch indices of the environments to reset; defaults to all.

    Returns:
      Batch tensor of the new observations.
    """
    if indices is None:
      indices = tf.range(len(self._batch_env))
    observ_dtype = self._parse_dtype(self._batch_env.observation_space)
    observ = tf.py_func(self._batch_env.reset, [indices], observ_dtype, name='reset')
    observ = tf.check_numerics(observ, 'observ')
    reward = tf.zeros_like(indices, tf.float32)
    done = tf.zeros_like(indices, tf.bool)
    with tf.control_dependencies([
        tf.scatter_update(self._observ, indices, observ),
        tf.scatter_update(self._reward, indices, reward),
        tf.scatter_update(self._done, indices, done)
    ]):
      return tf.identity(observ)
Esempio n. 4
0
  def simulate(self, action):
    """Step the batch of environments.

    The results of the step can be accessed from the variables defined below.

    Args:
      action: Tensor holding the batch of actions to apply.

    Returns:
      Operation.
    """
    with tf.name_scope('environment/simulate'):
      if action.dtype in (tf.float16, tf.float32, tf.float64):
        action = tf.check_numerics(action, 'action')
      observ_dtype = self._parse_dtype(self._batch_env.observation_space)
      observ, reward, done = tf.py_func(lambda a: self._batch_env.step(a)[:3], [action],
                                        [observ_dtype, tf.float32, tf.bool],
                                        name='step')
      observ = tf.check_numerics(observ, 'observ')
      reward = tf.check_numerics(reward, 'reward')
      return tf.group(self._observ.assign(observ), self._action.assign(action),
                      self._reward.assign(reward), self._done.assign(done))
    def compute_gradients(self, loss, var_list, **kwargs):
        grads_and_vars = tf.train.AdamOptimizer.compute_gradients(self, loss, var_list, **kwargs)
        grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
        flat_grad = tf.concat([tf.reshape(g, (-1,)) for g, v in grads_and_vars], axis=0)
        shapes = [v.shape.as_list() for g, v in grads_and_vars]
        sizes = [int(np.prod(s)) for s in shapes]

        num_tasks = self.comm.Get_size()
        buf = np.zeros(sum(sizes), np.float32)

        def _collect_grads(flat_grad):
            self.comm.Allreduce(flat_grad, buf, op=MPI.SUM)
            np.divide(buf, float(num_tasks), out=buf)
            return buf

        avg_flat_grad = tf.py_func(_collect_grads, [flat_grad], tf.float32)
        avg_flat_grad.set_shape(flat_grad.shape)
        avg_grads = tf.split(avg_flat_grad, sizes, axis=0)
        avg_grads_and_vars = [(tf.reshape(g, v.shape), v)
                    for g, (_, v) in zip(avg_grads, grads_and_vars)]

        return avg_grads_and_vars
Esempio n. 6
0
 def generate_detections(self,
                         cls_outputs,
                         box_outputs,
                         indices,
                         classes,
                         image_id,
                         image_scale,
                         image_size=None,
                         min_score_thresh=MIN_SCORE_THRESH,
                         max_boxes_to_draw=MAX_DETECTIONS_PER_IMAGE,
                         disable_pyfun=None,
                         nms_configs=None):
     """Generate detections based on class and box predictions."""
     if disable_pyfun:
         return _generate_detections_tf(cls_outputs,
                                        box_outputs,
                                        self._anchors.boxes,
                                        indices,
                                        classes,
                                        image_id,
                                        image_scale,
                                        image_size,
                                        min_score_thresh=min_score_thresh,
                                        max_boxes_to_draw=max_boxes_to_draw)
     else:
         logging.info('nms_configs=%s', nms_configs)
         return tf.py_func(
             functools.partial(_generate_detections,
                               nms_configs=nms_configs), [
                                   cls_outputs,
                                   box_outputs,
                                   self._anchors.boxes,
                                   indices,
                                   classes,
                                   image_id,
                                   image_scale,
                                   self._num_classes,
                                   max_boxes_to_draw,
                               ], tf.float32)
Esempio n. 7
0
def multiplicative_inverse(a, n):
  """Multiplicative inverse of a modulo n.

  Args:
    a: Tensor of shape [..., vocab_size]. It denotes an integer in the one-hot
      space.
    n: int Tensor of shape [...].

  Returns:
    Tensor of same shape and dtype as a.
  """
  a = tf.convert_to_tensor(a)
  n = tf.convert_to_tensor(n)
  vocab_size = a.shape[-1]
  a_dtype = a.dtype
  sparse_a = tf.argmax(a, axis=-1)
  # TODO(trandustin): Change to tf.py_function.
  sparse_outputs = tf1.py_func(
      py_multiplicative_inverse, [sparse_a, n], tf.int32)
  sparse_outputs.set_shape(sparse_a.shape)
  outputs = tf.one_hot(sparse_outputs, depth=vocab_size, dtype=a_dtype)
  return outputs
Esempio n. 8
0
def get_sari(source_ids, prediction_ids, target_ids, max_gram_size=4):
    """Computes the SARI scores from the given source, prediction and targets.

  Args:
    source_ids: A 2D tf.Tensor of size (batch_size , sequence_length)
    prediction_ids: A 2D tf.Tensor of size (batch_size, sequence_length)
    target_ids: A 3D tf.Tensor of size (batch_size, number_of_targets,
        sequence_length)
    max_gram_size: int. largest n-gram size we care about (e.g. 3 for unigrams,
        bigrams, and trigrams)

  Returns:
    A 4-tuple of 1D float Tensors of size (batch_size) for the SARI score and
        the keep, addition and deletion scores.
  """
    def get_sari_numpy(source_ids, prediction_ids, target_ids):
        """Iterate over elements in the batch and call the SARI function."""
        sari_scores = []
        keep_scores = []
        add_scores = []
        deletion_scores = []
        # Iterate over elements in the batch.
        for source_ids_i, prediction_ids_i, target_ids_i in zip(
                source_ids, prediction_ids, target_ids):
            sari, keep, add, deletion = get_sari_score(
                source_ids_i, prediction_ids_i, target_ids_i, max_gram_size,
                BETA_FOR_SARI_DELETION_F_MEASURE)
            sari_scores.append(sari)
            keep_scores.append(keep)
            add_scores.append(add)
            deletion_scores.append(deletion)
        return (np.asarray(sari_scores), np.asarray(keep_scores),
                np.asarray(add_scores), np.asarray(deletion_scores))

    sari, keep, add, deletion = tf.py_func(
        get_sari_numpy, [source_ids, prediction_ids, target_ids],
        [tf.float64, tf.float64, tf.float64, tf.float64])
    return sari, keep, add, deletion
Esempio n. 9
0
 def generate_detections(self,
                         cls_outputs,
                         box_outputs,
                         indices,
                         classes,
                         image_id,
                         image_scale,
                         level_index,
                         min_score_thresh,
                         max_boxes_to_draw,
                         use_tf=False):
     if use_tf:
         return _generate_detections_tf(cls_outputs,
                                        box_outputs,
                                        self._anchors.boxes,
                                        indices,
                                        classes,
                                        image_id,
                                        image_scale,
                                        level_index,
                                        min_score_thresh=min_score_thresh,
                                        max_boxes_to_draw=max_boxes_to_draw)
     else:
         return tf.py_func(
             _generate_detections,
             [
                 cls_outputs,
                 box_outputs,
                 self._anchors.boxes,
                 indices,
                 classes,
                 image_id,
                 image_scale,
                 self._num_classes,
                 level_index,
                 #image_id, image_scale, self._target_classes, level_index,
             ],
             [tf.float32, tf.float32, tf.float32, tf.float32])
Esempio n. 10
0
def load_scann_searcher(var_name,
                        checkpoint_path,
                        num_neighbors,
                        dimensions_per_block=2,
                        num_leaves=1000,
                        num_leaves_to_search=100,
                        training_sample_size=100000):
    """Load scann searcher from checkpoint."""
    with tf.device("/cpu:0"):
        np_db = tf.train.load_checkpoint(checkpoint_path).get_tensor(var_name)
        init_db = tf.py_func(lambda: np_db, [], tf.float32)
        init_db.set_shape(np_db.shape)
        tf_db = tf.get_local_variable(var_name, initializer=init_db)

        builder = ScannBuilder(db=tf_db,
                               num_neighbors=num_neighbors,
                               distance_measure="dot_product")
        builder = builder.tree(num_leaves=num_leaves,
                               num_leaves_to_search=num_leaves_to_search,
                               training_sample_size=training_sample_size)
        builder = builder.score_ah(dimensions_per_block=dimensions_per_block)
        searcher = builder.create_tf()
    return tf_db, searcher
  def create_sampling_ops(self, use_staging):
    """Creates the ops necessary to sample from the replay buffer.

    Creates the transition dictionary containing the sampling tensors.

    Args:
      use_staging: bool, when True it would use a staging area to prefetch
        the next sampling batch.
    """
    with tf.name_scope('sample_replay'):
      with tf.device('/cpu:*'):
        transition_type = self.memory.get_transition_elements()
        transition_tensors = tf.py_func(
            self.memory.sample_transition_batch, [],
            [return_entry.type for return_entry in transition_type],
            name='replay_sample_py_func')
        self._set_transition_shape(transition_tensors, transition_type)
        if use_staging:
          transition_tensors = self._set_up_staging(transition_tensors)
          self._set_transition_shape(transition_tensors, transition_type)

        # Unpack sample transition into member variables.
        self.unpack_transition(transition_tensors, transition_type)
Esempio n. 12
0
def _predict_sequences(frame_predictions, onset_predictions, offset_predictions,
                       velocity_values, hparams):
  """Predict a batch of sequences."""
  def predict_sequence(frame_predictions, onset_predictions, offset_predictions,
                       velocity_values, hparams):
    """Predict a single sequence."""
    if hparams.drums_only:
      sequence_prediction = infer_util.predict_sequence(
          frame_predictions=onset_predictions,
          onset_predictions=onset_predictions,
          offset_predictions=onset_predictions,
          velocity_values=velocity_values,
          min_pitch=constants.MIN_MIDI_PITCH,
          hparams=hparams,
          onsets_only=True)
      for note in sequence_prediction.notes:
        note.is_drum = True
    else:
      sequence_prediction = infer_util.predict_sequence(
          frame_predictions=frame_predictions,
          onset_predictions=onset_predictions,
          offset_predictions=offset_predictions,
          velocity_values=velocity_values,
          min_pitch=constants.MIN_MIDI_PITCH, hparams=hparams)
    return sequence_prediction.SerializeToString()

  sequences = []
  for i in range(frame_predictions.shape[0]):
    sequence = tf.py_func(
        functools.partial(predict_sequence, hparams=hparams),
        inp=[
            frame_predictions[i], onset_predictions[i], offset_predictions[i],
            velocity_values[i],
        ], Tout=tf.string, stateful=False)
    sequence.set_shape([])
    sequences.append(sequence)
  return tf.stack(sequences)
Esempio n. 13
0
    def get_batch(self, batch_size, config, num_unlabeled_per_class=0):
        """Generator producing a single batch of data (meta-train + meta-test)."""
        if num_unlabeled_per_class > 0:
            raise ValueError(
                'Unlabeled samples are currently only supported in '
                'balanced inputs.')
        sup_sample = functools.partial(self._make_supervised_batch,
                                       batch_size=batch_size)
        images, labels, classes = tf.py_func(sup_sample, [],
                                             (tf.float32, tf.int32, tf.int32),
                                             stateful=True)
        some_label = list(self.data.keys())[0]
        # Setting a proper shape for post-processing to work
        images.set_shape([batch_size] + list(self.data[some_label][0].shape))
        images = config.process(images)

        indices = tf.range(start=0, limit=tf.shape(images)[0], dtype=tf.int32)
        shuffled_indices = tf.random.shuffle(indices)

        images = tf.gather(images, shuffled_indices)
        labels = tf.gather(labels, shuffled_indices)
        classes = tf.gather(classes, shuffled_indices)

        return images, labels, classes
Esempio n. 14
0
def parse_production_rule_sequence_batch(features, max_length, grammar):
    """Parses a batch of expressions to sequences of production rules.

  Args:
    features: Dict of tensors. This dict need to have key 'expression_string',
        the corresponding value is a string tensor with shape [batch_size].
    max_length: Integer. The maximum length of the production rule sequence.
    grammar: arithmetic_grammar.Grammar.

  Returns:
    A feature dict. Key 'expression_sequence', 'expression_sequence_mask' are
    added to the dict.
    * 'expression_sequence': an int32 tensor with shape
          [batch_size, max_length].
    * 'expression_sequence_mask': a boolean tensor with shape
          [batch_size, max_length].
  """
    def _parse_expressions_to_indices_sequences(expression_strings):
        return grammar.parse_expressions_to_indices_sequences(
            expression_strings=[
                expression_string.decode('utf-8')
                for expression_string in expression_strings
            ],
            max_length=max_length)

    production_rule_sequences = tf.py_func(
        _parse_expressions_to_indices_sequences,
        [features['expression_string']],
        tf.int32,
        name='py_func-parse_production_rule_sequence_batch')
    production_rule_sequences.set_shape(
        (features['expression_string'].shape[0], max_length))
    features['expression_sequence'] = production_rule_sequences
    features['expression_sequence_mask'] = tf.not_equal(
        production_rule_sequences, grammar.padding_rule_index)
    return features
Esempio n. 15
0
 def finalize(self, sess, inputs, head=-1):
     with sess.graph.as_default():
         y_pred = tf.py_func(self.models[head].predict, [inputs], Tout=dtype)
         y_pred = tf.reshape(y_pred, (-1, self.num_outputs))
     return y_pred,
Esempio n. 16
0
def tf_put_text(imgs, texts):
    """Convert helper function to Tensorflow."""
    return tf.py_func(put_text, [imgs, texts], Tout=imgs.dtype)
Esempio n. 17
0
def train():
    ''' Main function for training and simple evaluation. '''
    with tf.Graph().as_default():
        with tf.device('/gpu:'+str(GPU_INDEX)):
            pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \
            heading_class_label_pl, heading_residual_label_pl, \
            size_class_label_pl, size_residual_label_pl = \
                MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT)

            is_training_pl = tf.placeholder(tf.bool, shape=())
            
            # Note the global_step=batch parameter to minimize. 
            # That tells the optimizer to increment the 'batch' parameter
            # for you every time it trains.
            batch = tf.get_variable('batch', [],
                initializer=tf.constant_initializer(0), trainable=False)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar('bn_decay', bn_decay)

            # Get model and losses 
            end_points = MODEL.get_model(pointclouds_pl, one_hot_vec_pl,
                is_training_pl, bn_decay=bn_decay)
            loss = MODEL.get_loss(labels_pl, centers_pl,
                heading_class_label_pl, heading_residual_label_pl,
                size_class_label_pl, size_residual_label_pl, end_points)
            tf.summary.scalar('loss', loss)

            losses = tf.get_collection('losses')
            total_loss = tf.add_n(losses, name='total_loss')
            tf.summary.scalar('total_loss', total_loss)

            # Write summaries of bounding box IoU and segmentation accuracies
            iou2ds, iou3ds = tf.py_func(provider.compute_box3d_iou, [\
                end_points['center'], \
                end_points['heading_scores'], end_points['heading_residuals'], \
                end_points['size_scores'], end_points['size_residuals'], \
                centers_pl, \
                heading_class_label_pl, heading_residual_label_pl, \
                size_class_label_pl, size_residual_label_pl], \
                [tf.float32, tf.float32])
            end_points['iou2ds'] = iou2ds 
            end_points['iou3ds'] = iou3ds 
            tf.summary.scalar('iou_2d', tf.reduce_mean(iou2ds))
            tf.summary.scalar('iou_3d', tf.reduce_mean(iou3ds))

            correct = tf.equal(tf.argmax(end_points['mask_logits'], 2),
                tf.to_int64(labels_pl))
            accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / \
                float(BATCH_SIZE*NUM_POINT)
            tf.summary.scalar('segmentation accuracy', accuracy)

            # Get training operator
            learning_rate = get_learning_rate(batch)
            tf.summary.scalar('learning_rate', learning_rate)
            if OPTIMIZER == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                    momentum=MOMENTUM)
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            train_op = optimizer.minimize(loss, global_step=batch)
            
            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()
        
        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config=config)

        # Add summary writers
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph)

        # Init variables
        if FLAGS.restore_model_path is None:
            init = tf.global_variables_initializer()
            sess.run(init)
        else:
            saver.restore(sess, FLAGS.restore_model_path)

        ops = {'pointclouds_pl': pointclouds_pl,
               'one_hot_vec_pl': one_hot_vec_pl,
               'labels_pl': labels_pl,
               'centers_pl': centers_pl,
               'heading_class_label_pl': heading_class_label_pl,
               'heading_residual_label_pl': heading_residual_label_pl,
               'size_class_label_pl': size_class_label_pl,
               'size_residual_label_pl': size_residual_label_pl,
               'is_training_pl': is_training_pl,
               'logits': end_points['mask_logits'],
               'centers_pred': end_points['center'],
               'loss': loss,
               'train_op': train_op,
               'merged': merged,
               'step': batch,
               'end_points': end_points}

        for epoch in range(MAX_EPOCH):
            log_string('**** EPOCH %03d ****' % (epoch))
            sys.stdout.flush()
             
            train_one_epoch(sess, ops, train_writer)
            eval_one_epoch(sess, ops, test_writer)

            # Save the variables to disk.
            if epoch % 10 == 0:
                save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"))
                log_string("Model saved in file: %s" % save_path)
Esempio n. 18
0
def py_func_metric(func, inputs, output_dtype=tf.float32):
    res = tf.py_func(func, inputs, [output_dtype], stateful=False)
    res = tf.reshape(res, [])
    return res
Esempio n. 19
0
    def get_batches(
        self,
        batch_sizes,
        config,
        num_unlabeled_per_class,
    ):
        """Generator producing multiple separate balanced batches of data.

    Arguments:
      batch_sizes: A list of batch sizes for all batches.
      config: Augmentation configuration.
      num_unlabeled_per_class: A list of integers indicating a number of
        "unlabeled" samples per class for each batch.

    Returns:
      A list of (images,labels) pairs produced for each output batch.
    """
        sup_sample = functools.partial(
            self._make_semisupervised_batches,
            num_unlabeled_per_class=num_unlabeled_per_class,
            batch_sizes=batch_sizes)
        # Returned array is [images, ..., labels, ..., images, ..., labels, ...]
        types = [tf.float32] * self.num_labels
        types += [tf.int32] * self.num_labels
        types += [tf.int32] * self.num_labels
        types = types * len(batch_sizes)
        output = tf.py_func(sup_sample, [], types, stateful=True)

        images_labels = []
        some_label = list(self.data.keys())[0]
        offs = 0
        for batch_size in batch_sizes:
            images = output[offs:offs + self.num_labels]
            offs += self.num_labels
            labels = output[offs:offs + self.num_labels]
            offs += self.num_labels
            classes = output[offs:offs + self.num_labels]
            offs += self.num_labels
            # Setting a proper shape for post-processing to work
            samples_per_label = self._labels_per_batch(batch_size)
            for image, num_samples in zip(images, samples_per_label):
                image.set_shape([num_samples] +
                                list(self.data[some_label][0].shape))
            # Processing and combining in batches
            if config.children:
                images = [
                    config.process(image_mat, idx)
                    for idx, image_mat in enumerate(images)
                ]
            else:
                images = [config.process(image_mat) for image_mat in images]
            images_labels.append((tf.concat(images,
                                            axis=0), tf.concat(labels, axis=0),
                                  tf.concat(classes, axis=0)))

        # Shuffling each batch
        output = []
        for images, labels, classes in images_labels:
            indices = tf.range(start=0,
                               limit=tf.shape(images)[0],
                               dtype=tf.int32)
            shuffled_indices = tf.random.shuffle(indices)
            images = tf.gather(images, shuffled_indices)
            labels = tf.gather(labels, shuffled_indices)
            classes = tf.gather(classes, shuffled_indices)
            output.append((images, labels, classes))
        return output
Esempio n. 20
0
 def _slow_tensorflow_op(self):
   """Returns a TensorFlow op that takes approximately 0.1s to complete."""
   def slow_func(v):
     time.sleep(0.1)
     return v
   return tf.py_func(slow_func, [tf.constant(0.)], tf.float32).op
Esempio n. 21
0
  def get_estimator_eval_metric_ops(self, eval_dict):
    """Returns metric ops for use in tf.estimator.EstimatorSpec.

    Args:
      eval_dict: A dictionary that holds an image, groundtruth, and detections
        for a batched example. Note that, we use only the first example for
        visualization. See eval_util.result_dict_for_batched_example() for a
        convenient method for constructing such a dictionary. The dictionary
        contains
        fields.InputDataFields.original_image: [batch_size, H, W, 3] image.
        fields.InputDataFields.original_image_spatial_shape: [batch_size, 2]
          tensor containing the size of the original image.
        fields.InputDataFields.true_image_shape: [batch_size, 3]
          tensor containing the spatial size of the upadded original image.
        fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4]
          float32 tensor with groundtruth boxes in range [0.0, 1.0].
        fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes]
          int64 tensor with 1-indexed groundtruth classes.
        fields.InputDataFields.groundtruth_instance_masks - (optional)
          [batch_size, num_boxes, H, W] int64 tensor with instance masks.
        fields.DetectionResultFields.detection_boxes - [batch_size,
          max_num_boxes, 4] float32 tensor with detection boxes in range [0.0,
          1.0].
        fields.DetectionResultFields.detection_classes - [batch_size,
          max_num_boxes] int64 tensor with 1-indexed detection classes.
        fields.DetectionResultFields.detection_scores - [batch_size,
          max_num_boxes] float32 tensor with detection scores.
        fields.DetectionResultFields.detection_masks - (optional) [batch_size,
          max_num_boxes, H, W] float32 tensor of binarized masks.
        fields.DetectionResultFields.detection_keypoints - (optional)
          [batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with
          keypoints.

    Returns:
      A dictionary of image summary names to tuple of (value_op, update_op). The
      `update_op` is the same for all items in the dictionary, and is
      responsible for saving a single side-by-side image with detections and
      groundtruth. Each `value_op` holds the tf.summary.image string for a given
      image.
    """
    if self._max_examples_to_draw == 0:
      return {}
    images = self.images_from_evaluation_dict(eval_dict)

    def get_images():
      """Returns a list of images, padded to self._max_images_to_draw."""
      images = self._images
      while len(images) < self._max_examples_to_draw:
        images.append(np.array(0, dtype=np.uint8))
      self.clear()
      return images

    def image_summary_or_default_string(summary_name, image):
      """Returns image summaries for non-padded elements."""
      return tf.cond(
          tf.equal(tf.size(tf.shape(image)), 4),
          lambda: tf.summary.image(summary_name, image),
          lambda: tf.constant(''))

    if tf.executing_eagerly():
      update_op = self.add_images([[images[0]]])
      image_tensors = get_images()
    else:
      update_op = tf.py_func(self.add_images, [[images[0]]], [])
      image_tensors = tf.py_func(
          get_images, [], [tf.uint8] * self._max_examples_to_draw)
    eval_metric_ops = {}
    for i, image in enumerate(image_tensors):
      summary_name = self._summary_name_prefix + '/' + str(i)
      value_op = image_summary_or_default_string(summary_name, image)
      eval_metric_ops[summary_name] = (value_op, update_op)
    return eval_metric_ops
        def infer_step(result, length):
            """Inference step."""
            def print_info(samples, result, length, new_length):
                tf.logging.info(
                    "length=%s new_length=%s length_diff=%s samples-result=%s",
                    length,
                    new_length,
                    new_length - length,
                    np.array_str(samples[0, -block_size - 1:-1, 0, 0] -
                                 result[0, -block_size:, 0, 0]).replace(
                                     "\n", ""),
                )

            features["targets"] = tf.pad(result,
                                         [[0, 0], [0, 1], [0, 0], [0, 0]])
            samples, logits, losses = self.sample(features)  # pylint: disable=unused-variable

            _, top_k_indices = tf.nn.top_k(
                logits[:, :-1, :1, :, :],
                k=self._decode_hparams.guess_and_check_top_k)
            in_top_k = tf.reduce_any(tf.equal(tf.to_int64(top_k_indices),
                                              tf.expand_dims(result, 4)),
                                     axis=4)

            within_epsilon = tf.less_equal(
                tf.abs(result - samples[:, :-1, :1, :]),
                self._decode_hparams.guess_and_check_epsilon)

            if self._decode_hparams.guess_and_check_top_k:
                tf.logging.info("Using guess_and_check_top_k=%s",
                                self._decode_hparams.guess_and_check_top_k)
                correct = in_top_k
            else:
                tf.logging.info("Using guess_and_check_epsilon=%s",
                                self._decode_hparams.guess_and_check_epsilon)
                correct = within_epsilon

            correct_cumsum = tf.cumsum(tf.to_int32(correct), axis=1)
            perfect_cumsum = 1 + tf.range(tf.shape(correct)[1])
            for axis in [0, 2, 3]:
                perfect_cumsum = tf.expand_dims(perfect_cumsum, axis=axis)

            new_length = tf.reduce_sum(tf.to_int32(
                tf.equal(correct_cumsum, perfect_cumsum)),
                                       axis=1)
            new_length = tf.squeeze(new_length, axis=[0, 1, 2])
            new_length = tf.minimum(new_length, decode_length)

            new_result = tf.concat([
                result[:, :new_length, :, :],
                tf.reshape(samples[:, new_length, :block_size, :],
                           [1, block_size, 1, 1])
            ],
                                   axis=1)

            with tf.control_dependencies([
                    tf.py_func(print_info,
                               [samples, result, length, new_length], [])
            ]):
                new_result = tf.identity(new_result)

            return new_result, new_length
Esempio n. 23
0
def inputs(config, files, is_training=False, is_testing=False):
    # parameters
    channels = config.in_channels
    threads = config.threads
    threads_py = config.threads_py
    scaling = config.scaling
    if is_training: num_epochs = config.num_epochs
    data_format = config.data_format
    patch_height = config.patch_height
    patch_width = config.patch_width
    batch_size = config.batch_size
    if is_training: buffer_size = config.buffer_size
    epoch_size = len(files)

    # dataset mapping function
    def parse1_func(filename):
        # read data
        dtype = tf.float32
        image = tf.read_file(filename)
        image = tf.image.decode_image(image, channels=channels)
        shape = tf.shape(image)
        height = shape[-3]
        width = shape[-2]
        # pre down-scale for high resolution image
        dscale = 1
        if is_training and config.pre_down:
            '''
            if (width >= 3072 and height >= 1536) or (width >= 1536 and height >= 3072):
                dscale = 3
            elif (width >= 1024 and height >= 512) or (width >= 512 and height >= 1024):
                dscale = 2
            '''
            def c_t(const1, const2, true_fn, false_fn):
                return tf.cond(tf.logical_or(
                    tf.logical_and(
                        tf.greater_equal(width, const1), tf.greater_equal(height, const2)
                    ),
                    tf.logical_and(
                        tf.greater_equal(width, const2), tf.greater_equal(height, const1)
                    )
                ), true_fn, false_fn)
            dscale = c_t(3072, 1536, lambda: 3,
                lambda: c_t(1024, 512, lambda: 2, lambda: 1)
            )
        elif is_testing and config.pre_down:
            '''
            if (width >= 3072 and height >= 3072):
                dscale = 4
            elif (width >= 2048 and height >= 2048):
                dscale = 3
            elif (width >= 1024 and height >= 1024):
                dscale = 2
            '''
            def c_t(const1, true_fn, false_fn):
                return tf.cond(tf.logical_and(
                    tf.greater_equal(width, const1), tf.greater_equal(height, const1)
                ), true_fn, false_fn)
            dscale = c_t(3072, lambda: 4,
                lambda: c_t(2048, lambda: 3,
                    lambda: c_t(1024, lambda: 2, lambda: 1)
                )
            )
        # padding
        cropped_height = patch_height * dscale
        cropped_width = patch_width * dscale
        '''
        if cropped_height > height or cropped_width > width:
            pad_height = cropped_height - height
            pad_width = cropped_width - width
            if pad_height > 0:
                pad_height = [pad_height // 2, pad_height - pad_height // 2]
                height = cropped_height
            else:
                pad_height = [0, 0]
            if pad_width > 0:
                pad_width = [pad_width // 2, pad_width - pad_width // 2]
                width = cropped_width
            else:
                pad_width = [0, 0]
            block = tf.pad(image, [pad_height, pad_width, [0, 0]], mode='REFLECT')
        else:
            block = image
        '''
        cond_height = tf.greater(cropped_height, height)
        cond_width = tf.greater(cropped_width, width)
        def c_f1():
            def _1():
                ph = cropped_height - height
                return [ph // 2, ph - ph // 2]
            pad_height = tf.cond(cond_height, _1, lambda: [0, 0])
            def _2():
                pw = cropped_width - width
                return [pw // 2, pw - pw // 2]
            pad_width = tf.cond(cond_width, _2, lambda: [0, 0])
            return tf.pad(image, [pad_height, pad_width, [0, 0]], mode='REFLECT')
        block = tf.cond(tf.logical_or(cond_height, cond_width), c_f1, lambda: image)
        height = tf.maximum(cropped_height, height)
        width = tf.maximum(cropped_width, width)
        # cropping
        if is_training:
            block = tf.random_crop(block, [cropped_height, cropped_width, channels])
            block = tf.image.random_flip_up_down(block)
            block = tf.image.random_flip_left_right(block)
        elif is_testing:
            offset_height = (height - cropped_height) // 2
            offset_width = (width - cropped_width) // 2
            block = tf.image.crop_to_bounding_box(block, offset_height, offset_width,
                                                  cropped_height, cropped_width)
        # convert dtype
        block = tf.image.convert_image_dtype(block, dtype, saturate=False)
        # random color augmentation
        if is_training and config.color_augmentation > 0:
            block = tf.image.random_saturation(block, 1 - config.color_augmentation, 1 + config.color_augmentation)
            block = tf.image.random_brightness(block, config.color_augmentation)
            block = tf.image.random_contrast(block, 1 - config.color_augmentation, 1 + config.color_augmentation)
        # data format conversion
        block.set_shape([None, None, channels])
        if data_format == 'NCHW':
            block = tf.transpose(block, (2, 0, 1))
        # return
        return block
    
    # tf.py_func processing using vapoursynth, numpy, etc.
    import threading
    import vapoursynth as vs
    from scipy import ndimage
    
    def eval_random_select(n, clips):
        rand_idx = np.random.randint(0, len(clips))
        return clips[rand_idx]
    
    def SigmoidInverse(clip, thr=0.5, cont=6.5, epsilon=1e-6):
        assert clip.format.sample_type == vs.FLOAT
        x0 = 1 / (1 + np.exp(cont * thr))
        x1 = 1 / (1 + np.exp(cont * (thr - 1)))
        # thr - log(max(1 / max(x * (x1 - x0) + x0, epsilon) - 1, epsilon)) / cont
        expr = '{thr} 1 x {x1_x0} * {x0} + {epsilon} max / 1 - {epsilon} max log {cont_rec} * -'.format(thr=thr, cont_rec=1 / cont, epsilon=epsilon, x0=x0, x1_x0=x1 - x0)
        return clip.std.Expr(expr)
    
    def SigmoidDirect(clip, thr=0.5, cont=6.5):
        assert clip.format.sample_type == vs.FLOAT
        x0 = 1 / (1 + np.exp(cont * thr))
        x1 = 1 / (1 + np.exp(cont * (thr - 1)))
        # (1 / (1 + exp(cont * (thr - x))) - x0) / (x1 - x0)
        expr = '1 1 {cont} {thr} x - * exp + / {x0} - {x1_x0_rec} *'.format(thr=thr, cont=cont, x0=x0, x1_x0_rec=1 / (x1 - x0))
        return clip.std.Expr(expr)
    
    _lock = threading.Lock()
    _index_ref = [0]
    _src_ref = [None for _ in range(epoch_size)]
    core = vs.get_core(threads=1 if is_testing else threads_py)
    core.max_cache_size = 8000
    _dscales = list(range(1, 5)) if config.pre_down else [1]
    _src_blk = [core.std.BlankClip(None, patch_width * s, patch_height * s,
                                   format=vs.RGBS, length=epoch_size)
                for s in _dscales]
    _dst_blk = core.std.BlankClip(None, patch_width // scaling, patch_height // scaling,
                                 format=vs.RGBS, length=epoch_size)
    
    def src_frame_func(n, f):
        f_out = f.copy()
        planes = f_out.format.num_planes
        # output
        for p in range(planes):
            f_arr = np.array(f_out.get_write_array(p), copy=False)
            np.copyto(f_arr, _src_ref[n][p, :, :] if data_format == 'NCHW' else _src_ref[n][:, :, p])
        # set frame properties
        f_out.props['_Primaries'] = 1 # BT.709
        f_out.props['_Transfer'] = 1 # BT.709
        return f_out
    _srcs = [s.std.ModifyFrame(s, src_frame_func) for s in _src_blk]
    _srcs_linear = [s.resize.Bicubic(transfer_s='linear') for s in _srcs]
    
    def src_down_func(clip):
        dw = patch_width
        dh = patch_height
        if clip.width != dw or clip.height != dh:
            clip = SigmoidInverse(clip)
            clip = clip.resize.Bicubic(dw, dh, filter_param_a=0, filter_param_b=0.5)
            clip = SigmoidDirect(clip)
        return clip
    if config.pre_down:
        _srcs_linear = [src_down_func(s) for s in _srcs_linear]
        _srcs = _srcs[0:1] + [s.resize.Bicubic(transfer_s='709')
                                   for s in _srcs_linear[1:]]
    
    def src_select_eval(n):
        # select source
        shape = _src_ref[n].shape
        sh = shape[-2 if data_format == 'NCHW' else -3]
        dscale = sh // patch_height
        # downscale if needed
        clip = _srcs[dscale - 1]
        return clip
    if config.pre_down:
        _src = _src_blk[0].std.FrameEval(src_select_eval)
    else:
        _src = _srcs[0]
    
    def resize_set_func(clip, convert_linear=False):
        # disable resize set when scaling=1
        if scaling == 1:
            return clip
        # parameters
        dw = int(patch_width / scaling + 0.5)
        dh = int(patch_height / scaling + 0.5)
        rets = {}
        # resizers
        rets['bilinear'] = clip.resize.Bilinear(dw, dh)
        rets['spline16'] = clip.resize.Spline16(dw, dh)
        rets['spline36'] = clip.resize.Spline36(dw, dh)
        for taps in range(2, 12):
            rets['lanczos{}'.format(taps)] = clip.resize.Lanczos(dw, dh, filter_param_a=taps)
        # linear to gamma
        if convert_linear:
            for key in rets:
                rets[key] = rets[key].resize.Bicubic(transfer_s='709', transfer_in_s='linear')
        return rets
    
    def resize_eval(n, src, src_linear, resizes, linear_resizes, dscale=None):
        # select source
        if dscale is True:
            shape = _src_ref[n].shape
            sh = shape[-2 if data_format == 'NCHW' else -3]
            dscale = max(1, sh // patch_height)
        if dscale:
            src = src[dscale - 1]
            src_linear = src_linear[dscale - 1]
            resizes = resizes[dscale - 1]
            linear_resizes = linear_resizes[dscale - 1]
        # initialize
        clip = src
        # multiple stages
        max_iter = config.multistage_resize * 2
        if scaling != 1: max_iter += 1
        for _ in range(max_iter):
            downscale = _ % 2 == 0
            # randomly skip multistage resize
            scaling_match = _ % 2 == 0 if scaling == 1 else _ % 2 == 1 # whether the last scaling matches output size
            if _ > 0 and scaling_match and np.random.uniform(0, 1) < 0.7:
                break
            # scaling size
            if scaling == 1:
                scaling1 = 1
                while scaling1 < 4 / 3: # [4 / 3, ~2)
                    scaling1 = 2 ** np.random.normal(0.6, 0.2)
            else:
                scaling1 = scaling
            dw = int(patch_width / scaling1 + 0.5) if downscale else patch_width
            dh = int(patch_height / scaling1 + 0.5) if downscale else patch_height
            use_resize_set = scaling != 1 and _ == 0
            # random number generator
            rand_val = np.random.uniform(-1, 1) if config.random_resizer == 0 else config.random_resizer
            abs_rand = np.abs(rand_val)
            # random gamma-to-linear
            if _ == 0:
                clip = src_linear if rand_val < 0 else src
                resizes = linear_resizes if rand_val < 0 else resizes
            # random resizers
            if abs_rand < (0.05 if downscale else 0.05):
                clip = resizes['bilinear'] if use_resize_set else clip.resize.Bilinear(dw, dh)
            elif abs_rand < (0.10 if downscale else 0.10):
                clip = resizes['spline16'] if use_resize_set else clip.resize.Spline16(dw, dh)
            elif abs_rand < (0.15 if downscale else 0.15):
                clip = resizes['spline36'] if use_resize_set else clip.resize.Spline36(dw, dh)
            elif abs_rand < (0.25 if downscale else 0.40): # Lanczos taps=[2, 12)
                taps = int(np.clip(np.random.exponential(2) + 2, 2, 11))
                clip = resizes['lanczos{}'.format(taps)] if use_resize_set else clip.resize.Lanczos(dw, dh, filter_param_a=taps)
            elif abs_rand < (0.50 if downscale else 0.50): # Catmull-Rom
                b = 0 if config.random_resizer == 0.4 else np.random.normal(0, 1/6)
                c = (1 - b) * 0.5
                clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c)
            elif abs_rand < (0.60 if downscale else 0.60): # Mitchell-Netravali (standard Bicubic)
                b = 1/3 if config.random_resizer == 0.6 else np.random.normal(1/3, 1/6)
                c = (1 - b) * 0.5
                clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c)
            elif abs_rand < (0.80 if downscale else 0.70): # sharp Bicubic
                b = -0.5 if config.random_resizer == 0.7 else np.random.normal(-0.5, 0.25)
                c = b * -0.5
                clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c)
            elif abs_rand < (0.85 if downscale else 0.80): # soft Bicubic
                b = 0.75 if config.random_resizer == 0.8 else np.random.normal(0.75, 0.25)
                c = 1 - b
                clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c)
            elif abs_rand < (1.00 if downscale else 0.90): # arbitrary Bicubic
                b = np.random.normal(0, 0.5)
                c = np.random.normal(0.25, 0.25)
                clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c)
            elif abs_rand < (1.00 if downscale else 1.00): # Bicubic with haloing & aliasing
                b = np.random.normal(0, 1) # amount of haloing
                c = -1 # when c is around b * 0.8, aliasing is minimum
                if b >= 0: # with aliasing
                    b = 1 + b
                    while c < 0 or c > b * 1.2:
                        c = np.random.normal(b * 0.4, b * 0.2)
                else: # without aliasing
                    b = 1 - b
                    while c < 0 or c > b * 1.2:
                        c = np.random.normal(b * 0.8, b * 0.2)
                b = -b
                clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c)
        # return
        return clip
    
    _resizes = [resize_set_func(s, convert_linear=False) for s in _srcs]
    _linear_resizes = [resize_set_func(s, convert_linear=config.multistage_resize == 0) for s in _srcs_linear]
    _dst = _dst_blk.std.FrameEval(lambda n: resize_eval(n, _srcs, _srcs_linear, _resizes, _linear_resizes, dscale=True))
    _dst = _dst.resize.Bicubic(transfer_s='709') # convert to BT.709 transfer
    
    # chroma subsampling
    def chroma_subsampling(src):
        YUV420PS = core.register_format(vs.YUV, vs.FLOAT, 32, 1, 1)
        src420 = src.resize.Bicubic(format=YUV420PS, matrix_s='709', filter_param_a=0, filter_param_b=0.5)
        clips = [src420.resize.Bilinear(format=vs.RGBS, matrix_in_s='709'),
                src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=1.0, filter_param_b=0.0),
                src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=0.5, filter_param_b=0.5),
                src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=1 / 3, filter_param_b=1 / 3),
                src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=0, filter_param_b=0.5),
                src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=-0.5, filter_param_b=0.25),
                src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=-1, filter_param_b=0.3),
                src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=-1, filter_param_b=0.8),
                src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=-2, filter_param_b=0.6),
                src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=-2, filter_param_b=1.6)]
        clips += [src] * 6
        clip = src.std.FrameEval(lambda n: eval_random_select(n, clips))
        return clip
    _dst = chroma_subsampling(_dst)

    # parser
    def parse2_pyfunc(label):
        channel_index = -3 if data_format == 'NCHW' else -1
        dscale = label.shape[-2 if data_format == 'NCHW' else -3] // patch_height
        # safely acquire and increase shared index
        _lock.acquire()
        index = _index_ref[0]
        _index_ref[0] = (index + 1) % epoch_size
        _lock.release()
        # processing using vs
        _src_ref[index] = label
        if config.pre_down and dscale > 1: f_src = _src.get_frame(index)
        f_dst = _dst.get_frame(index)
        _src_ref[index] = None
        # vs.VideoFrame to np.ndarray
        if config.pre_down and dscale > 1:
            label = []
            planes = f_src.format.num_planes
            for p in range(planes):
                f_arr = np.array(f_src.get_read_array(p), copy=False)
                label.append(f_arr)
            label = np.stack(label, axis=channel_index)
        data = []
        planes = f_dst.format.num_planes
        for p in range(planes):
            f_arr = np.array(f_dst.get_read_array(p), copy=False)
            data.append(f_arr)
        data = np.stack(data, axis=channel_index)
        # add Gaussian noise of random scale and random spatial correlation
        def _add_noise(data, noise_scale, noise_corr):
            # noise spatial correlation
            def noise_correlation(noise, corr):
                if corr > 0:
                    sigma = np.random.normal(corr, corr)
                    if sigma > 0.25:
                        sigma = [0, sigma, sigma] if data_format == 'NCHW' else [sigma, sigma, 0]
                        noise = ndimage.gaussian_filter(noise, sigma, truncate=2.0)
                return noise
            if noise_scale <= 0:
                return data
            rand_val = np.random.uniform(0, 1)
            scale = np.random.exponential(noise_scale)
            if rand_val < 0.2 or scale < 0.002: # won't add noise
                return data
            noise_shape = list(data.shape)
            if rand_val < 0.35: # RGB noise
                noise = np.random.normal(0.0, scale, noise_shape).astype(np.float32)
                noise = noise_correlation(noise, noise_corr)
            else: # Y/YUV noise
                noise_shape[channel_index] = 1
                noise_y = np.random.normal(0.0, scale, noise_shape).astype(np.float32)
                noise_y = noise_correlation(noise_y, noise_corr)
                scale_uv = np.random.exponential(noise_scale / 2)
                if rand_val < 0.55 and scale_uv > 0.002: # YUV noise
                    noise_u = np.random.normal(0.0, scale_uv, noise_shape).astype(np.float32)
                    noise_u = noise_correlation(noise_u, noise_corr * 1.5)
                    noise_v = np.random.normal(0.0, scale_uv, noise_shape).astype(np.float32)
                    noise_v = noise_correlation(noise_v, noise_corr * 1.5)
                    rand_val2 = np.random.uniform(0, 1)
                    if rand_val2 < 0.3: # Rec.601
                        Kr = 0.299
                        Kg = 0.587
                        Kb = 0.114
                    elif rand_val2 < 0.9: # Rec.709
                        Kr = 0.2126
                        Kg = 0.7152
                        Kb = 0.0722
                    else: # Rec.2020
                        Kr = 0.2627
                        Kg = 0.6780
                        Kb = 0.0593
                    noise_r = noise_y + ((1 - Kr) / 2) * noise_v
                    noise_b = noise_y + ((1 - Kb) / 2) * noise_u
                    noise_g = (1 / Kg) * noise_y - (Kr / Kg) * noise_r - (Kb / Kg) * noise_b
                    noise = [noise_r, noise_g, noise_b]
                else:
                    noise = [noise_y, noise_y, noise_y]
                noise = np.concatenate(noise, axis=channel_index)
            # adding noise
            return data + noise
        data = _add_noise(data, config.noise_scale, config.noise_corr)
        # return
        return data, label
    
    def parse3_func(data, label):
        # final process
        data = tf.clip_by_value(data, 0.0, 1.0)
        label = tf.clip_by_value(label, 0.0, 1.0)
        # JPEG encoding
        def _jpeg_coding(data, quality_step, random_seed=None):
            if quality_step <= 0:
                return data
            steps = 16
            prob_step = 0.02
            rand_val = tf.random_uniform([], -1, 1, seed=random_seed)
            abs_rand = tf.abs(rand_val)
            def c_f1(data):
                if data_format == 'NCHW':
                    data = tf.transpose(data, (1, 2, 0))
                data = tf.image.convert_image_dtype(data, tf.uint8, saturate=True)
                def _f1(quality, chroma_ds):
                    quality = int(quality + 0.5)
                    return tf.image.encode_jpeg(data, quality=quality, chroma_downsampling=chroma_ds)
                def _cond_recur(abs_rand, count=15, chroma_ds=False, prob=0.0, quality=100.0):
                    prob += prob_step
                    if count <= 0:
                        return _f1(quality, chroma_ds)
                    else:
                        return tf.cond(abs_rand < prob, lambda: _f1(quality, chroma_ds),
                            lambda: _cond_recur(abs_rand, count - 1, chroma_ds, prob, quality - config.jpeg_coding))
                data = tf.cond(rand_val < 0, lambda: _cond_recur(abs_rand, steps - 1, True),
                    lambda: _cond_recur(abs_rand, steps - 1, False))
                data = tf.image.decode_jpeg(data)
                data = tf.image.convert_image_dtype(data, tf.float32, saturate=False)
                if data_format == 'NCHW':
                    data = tf.transpose(data, (2, 0, 1))
                return data
            return tf.cond(rand_val < prob_step * steps, lambda: c_f1(data), lambda: data)
        data = _jpeg_coding(data, config.jpeg_coding, config.random_seed if is_testing else None)
        # return
        return data, label
    
    # Dataset API
    dataset = tf.data.Dataset.from_tensor_slices((files))
    if is_training and buffer_size > 0: dataset = dataset.shuffle(buffer_size)
    dataset = dataset.map(parse1_func, num_parallel_calls=1 if is_testing else threads)
    dataset = dataset.map(lambda label: tuple(tf.py_func(parse2_pyfunc,
                              [label], [tf.float32, tf.float32])),
                          num_parallel_calls=1 if is_testing else threads_py)
    dataset = dataset.map(parse3_func, num_parallel_calls=1 if is_testing else threads)
    dataset = dataset.batch(batch_size)
    dataset = dataset.repeat(num_epochs if is_training else None)
    dataset = dataset.prefetch(64)
    
    # return iterator
    iterator = dataset.make_one_shot_iterator()
    next_data, next_label = iterator.get_next()
    
    # data shape declaration
    data_shape = [None] * 4
    data_shape[-3 if data_format == 'NCHW' else -1] = channels
    next_data.set_shape(data_shape)
    next_label.set_shape(data_shape)
    
    return next_data, next_label
Esempio n. 24
0
    def add_eval_dict(self, eval_dict):
        """Observes an evaluation result dict for a single example.

    When executing eagerly, once all observations have been observed by this
    method you can use `.evaluate()` to get the final metrics.

    When using `tf.estimator.Estimator` for evaluation this function is used by
    `get_estimator_eval_metric_ops()` to construct the metric update op.

    Args:
      eval_dict: A dictionary that holds tensors for evaluating an object
        detection model, returned from
        eval_util.result_dict_for_single_example().

    Returns:
      None when executing eagerly, or an update_op that can be used to update
      the eval metrics in `tf.estimator.EstimatorSpec`.
    """
        def update_op(image_id_batched, groundtruth_boxes_batched,
                      groundtruth_classes_batched,
                      groundtruth_instance_masks_batched,
                      groundtruth_verified_neg_classes_batched,
                      groundtruth_not_exhaustive_classes_batched,
                      num_gt_boxes_per_image, detection_scores_batched,
                      detection_classes_batched, detection_masks_batched,
                      num_det_boxes_per_image, original_image_spatial_shape):
            """Update op for metrics."""

            for (image_id, groundtruth_boxes, groundtruth_classes,
                 groundtruth_instance_masks, groundtruth_verified_neg_classes,
                 groundtruth_not_exhaustive_classes, num_gt_box,
                 detection_scores, detection_classes, detection_masks,
                 num_det_box, original_image_shape) in zip(
                     image_id_batched, groundtruth_boxes_batched,
                     groundtruth_classes_batched,
                     groundtruth_instance_masks_batched,
                     groundtruth_verified_neg_classes_batched,
                     groundtruth_not_exhaustive_classes_batched,
                     num_gt_boxes_per_image, detection_scores_batched,
                     detection_classes_batched, detection_masks_batched,
                     num_det_boxes_per_image, original_image_spatial_shape):
                self.add_single_ground_truth_image_info(
                    image_id, {
                        input_data_fields.groundtruth_boxes:
                        groundtruth_boxes[:num_gt_box],
                        input_data_fields.groundtruth_classes:
                        groundtruth_classes[:num_gt_box],
                        input_data_fields.groundtruth_instance_masks:
                        groundtruth_instance_masks[:num_gt_box]
                        [:original_image_shape[0], :original_image_shape[1]],
                        input_data_fields.groundtruth_verified_neg_classes:
                        groundtruth_verified_neg_classes,
                        input_data_fields.groundtruth_not_exhaustive_classes:
                        groundtruth_not_exhaustive_classes
                    })
                self.add_single_detected_image_info(
                    image_id, {
                        'detection_scores':
                        detection_scores[:num_det_box],
                        'detection_classes':
                        detection_classes[:num_det_box],
                        'detection_masks':
                        detection_masks[:num_det_box]
                        [:original_image_shape[0], :original_image_shape[1]]
                    })

        # Unpack items from the evaluation dictionary.
        input_data_fields = fields.InputDataFields
        detection_fields = fields.DetectionResultFields
        image_id = eval_dict[input_data_fields.key]
        original_image_spatial_shape = eval_dict[
            input_data_fields.original_image_spatial_shape]
        groundtruth_boxes = eval_dict[input_data_fields.groundtruth_boxes]
        groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes]
        groundtruth_instance_masks = eval_dict[
            input_data_fields.groundtruth_instance_masks]
        groundtruth_verified_neg_classes = eval_dict[
            input_data_fields.groundtruth_verified_neg_classes]
        groundtruth_not_exhaustive_classes = eval_dict[
            input_data_fields.groundtruth_not_exhaustive_classes]

        num_gt_boxes_per_image = eval_dict.get(
            input_data_fields.num_groundtruth_boxes, None)
        detection_scores = eval_dict[detection_fields.detection_scores]
        detection_classes = eval_dict[detection_fields.detection_classes]
        detection_masks = eval_dict[detection_fields.detection_masks]
        num_det_boxes_per_image = eval_dict.get(
            detection_fields.num_detections, None)

        if not image_id.shape.as_list():
            # Apply a batch dimension to all tensors.
            image_id = tf.expand_dims(image_id, 0)
            groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0)
            groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
            groundtruth_instance_masks = tf.expand_dims(
                groundtruth_instance_masks, 0)
            groundtruth_verified_neg_classes = tf.expand_dims(
                groundtruth_verified_neg_classes, 0)
            groundtruth_not_exhaustive_classes = tf.expand_dims(
                groundtruth_not_exhaustive_classes, 0)
            detection_scores = tf.expand_dims(detection_scores, 0)
            detection_classes = tf.expand_dims(detection_classes, 0)
            detection_masks = tf.expand_dims(detection_masks, 0)

            if num_gt_boxes_per_image is None:
                num_gt_boxes_per_image = tf.shape(groundtruth_boxes)[1:2]
            else:
                num_gt_boxes_per_image = tf.expand_dims(
                    num_gt_boxes_per_image, 0)

            if num_det_boxes_per_image is None:
                num_det_boxes_per_image = tf.shape(detection_scores)[1:2]
            else:
                num_det_boxes_per_image = tf.expand_dims(
                    num_det_boxes_per_image, 0)
        else:
            if num_gt_boxes_per_image is None:
                num_gt_boxes_per_image = tf.tile(
                    tf.shape(groundtruth_boxes)[1:2],
                    multiples=tf.shape(groundtruth_boxes)[0:1])
            if num_det_boxes_per_image is None:
                num_det_boxes_per_image = tf.tile(
                    tf.shape(detection_scores)[1:2],
                    multiples=tf.shape(detection_scores)[0:1])

        return tf.py_func(update_op, [
            image_id, groundtruth_boxes, groundtruth_classes,
            groundtruth_instance_masks, groundtruth_verified_neg_classes,
            groundtruth_not_exhaustive_classes, num_gt_boxes_per_image,
            detection_scores, detection_classes, detection_masks,
            num_det_boxes_per_image, original_image_spatial_shape
        ], [])
Esempio n. 25
0
 def generate_detections(self, cls_outputs, box_outputs, indices, classes,
                         image_id, image_scale):
     return tf.py_func(_generate_detections, [
         cls_outputs, box_outputs, self._anchors.boxes, indices, classes,
         image_id, image_scale, self._num_classes
     ], tf.float32)
Esempio n. 26
0
def model_fn(features, labels, mode, params, config):
  """Build the model function for use in an estimator.

  Args:
    features: The input features for the estimator.
    labels: The labels, unused here.
    mode: Signifies whether it is train or test or predict.
    params: Some hyperparameters as a dictionary.
    config: The RunConfig, unused here.
  Returns:
    EstimatorSpec: A tf.estimator.EstimatorSpec instance.
  """
  del labels, config

  encoder = make_encoder(params["activation"],
                         params["num_topics"],
                         params["layer_sizes"])
  decoder, topics_words = make_decoder(params["num_topics"],
                                       features.shape[1])
  topics_prior = make_prior(params["num_topics"],
                            params["prior_initial_value"])

  alpha = topics_prior.concentration

  topics_posterior = encoder(features)
  topics = topics_posterior.sample(seed=234)
  random_reconstruction = decoder(topics)

  reconstruction = random_reconstruction.log_prob(features)
  tf1.summary.scalar("reconstruction", tf.reduce_mean(reconstruction))

  # Compute the KL-divergence between two Dirichlets analytically.
  # The sampled KL does not work well for "sparse" distributions
  # (see Appendix D of [2]).
  kl = tfd.kl_divergence(topics_posterior, topics_prior)
  tf1.summary.scalar("kl", tf.reduce_mean(kl))

  # Ensure that the KL is non-negative (up to a very small slack).
  # Negative KL can happen due to numerical instability.
  with tf.control_dependencies(
      [tf.debugging.assert_greater(kl, -1e-3, message="kl")]):
    kl = tf.identity(kl)

  elbo = reconstruction - kl
  avg_elbo = tf.reduce_mean(elbo)
  tf1.summary.scalar("elbo", avg_elbo)
  loss = -avg_elbo

  # Perform variational inference by minimizing the -ELBO.
  global_step = tf1.train.get_or_create_global_step()
  optimizer = tf1.train.AdamOptimizer(params["learning_rate"])

  # This implements the "burn-in" for prior parameters (see Appendix D of [2]).
  # For the first prior_burn_in_steps steps they are fixed, and then trained
  # jointly with the other parameters.
  grads_and_vars = optimizer.compute_gradients(loss)
  grads_and_vars_except_prior = [
      x for x in grads_and_vars if x[1] not in topics_prior.variables]

  def train_op_except_prior():
    return optimizer.apply_gradients(
        grads_and_vars_except_prior,
        global_step=global_step)

  def train_op_all():
    return optimizer.apply_gradients(
        grads_and_vars,
        global_step=global_step)

  train_op = tf.cond(
      pred=global_step < params["prior_burn_in_steps"],
      true_fn=train_op_except_prior,
      false_fn=train_op_all)

  # The perplexity is an exponent of the average negative ELBO per word.
  words_per_document = tf.reduce_sum(features, axis=1)
  log_perplexity = -elbo / words_per_document
  tf1.summary.scalar("perplexity", tf.exp(tf.reduce_mean(log_perplexity)))
  (log_perplexity_tensor,
   log_perplexity_update) = tf1.metrics.mean(log_perplexity)
  perplexity_tensor = tf.exp(log_perplexity_tensor)

  # Obtain the topics summary. Implemented as a py_func for simplicity.
  topics = tf1.py_func(
      functools.partial(get_topics_strings, vocabulary=params["vocabulary"]),
      [topics_words, alpha],
      tf.string,
      stateful=False)
  tf1.summary.text("topics", topics)

  return tf1.estimator.EstimatorSpec(
      mode=mode,
      loss=loss,
      train_op=train_op,
      eval_metric_ops={
          "elbo": tf1.metrics.mean(elbo),
          "reconstruction": tf1.metrics.mean(reconstruction),
          "kl": tf1.metrics.mean(kl),
          "perplexity": (perplexity_tensor, log_perplexity_update),
          "topics": (topics, tf.no_op()),
      },
  )
Esempio n. 27
0
  def build():
    """Builds the Tensorflow graph."""
    inputs, labels, lengths = None, None, None

    if mode in ('train', 'eval'):
      if isinstance(no_event_label, numbers.Number):
        label_shape = []
      else:
        label_shape = [len(no_event_label)]
      inputs, labels, lengths = magenta.common.get_padded_batch(
          sequence_example_file_paths, hparams.batch_size, input_size,
          label_shape=label_shape, shuffle=mode == 'train')

    elif mode == 'generate':
      inputs = tf.placeholder(tf.float32, [hparams.batch_size, None,
                                           input_size])

    if isinstance(encoder_decoder,
                  magenta.music.OneHotIndexEventSequenceEncoderDecoder):
      expanded_inputs = tf.one_hot(
          tf.cast(tf.squeeze(inputs, axis=-1), tf.int64),
          encoder_decoder.input_depth)
    else:
      expanded_inputs = inputs

    dropout_keep_prob = 1.0 if mode == 'generate' else hparams.dropout_keep_prob

    if hparams.use_cudnn:
      outputs, initial_state, final_state = make_cudnn(
          expanded_inputs, hparams.rnn_layer_sizes, hparams.batch_size, mode,
          dropout_keep_prob=dropout_keep_prob,
          residual_connections=hparams.residual_connections)

    else:
      cell = make_rnn_cell(
          hparams.rnn_layer_sizes,
          dropout_keep_prob=dropout_keep_prob,
          attn_length=hparams.attn_length,
          residual_connections=hparams.residual_connections)

      initial_state = cell.zero_state(hparams.batch_size, tf.float32)

      outputs, final_state = tf.nn.dynamic_rnn(
          cell, inputs, sequence_length=lengths, initial_state=initial_state,
          swap_memory=True)

    outputs_flat = magenta.common.flatten_maybe_padded_sequences(
        outputs, lengths)
    if isinstance(num_classes, numbers.Number):
      num_logits = num_classes
    else:
      num_logits = sum(num_classes)
    logits_flat = contrib_layers.linear(outputs_flat, num_logits)

    if mode in ('train', 'eval'):
      labels_flat = magenta.common.flatten_maybe_padded_sequences(
          labels, lengths)

      if isinstance(num_classes, numbers.Number):
        softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=labels_flat, logits=logits_flat)
        predictions_flat = tf.argmax(logits_flat, axis=1)
      else:
        logits_offsets = np.cumsum([0] + num_classes)
        softmax_cross_entropy = []
        predictions = []
        for i in range(len(num_classes)):
          softmax_cross_entropy.append(
              tf.nn.sparse_softmax_cross_entropy_with_logits(
                  labels=labels_flat[:, i],
                  logits=logits_flat[
                      :, logits_offsets[i]:logits_offsets[i + 1]]))
          predictions.append(
              tf.argmax(logits_flat[
                  :, logits_offsets[i]:logits_offsets[i + 1]], axis=1))
        predictions_flat = tf.stack(predictions, 1)

      correct_predictions = tf.to_float(
          tf.equal(labels_flat, predictions_flat))
      event_positions = tf.to_float(tf.not_equal(labels_flat, no_event_label))
      no_event_positions = tf.to_float(tf.equal(labels_flat, no_event_label))

      # Compute the total number of time steps across all sequences in the
      # batch. For some models this will be different from the number of RNN
      # steps.
      def batch_labels_to_num_steps(batch_labels, lengths):
        num_steps = 0
        for labels, length in zip(batch_labels, lengths):
          num_steps += encoder_decoder.labels_to_num_steps(labels[:length])
        return np.float32(num_steps)
      num_steps = tf.py_func(
          batch_labels_to_num_steps, [labels, lengths], tf.float32)

      if mode == 'train':
        loss = tf.reduce_mean(softmax_cross_entropy)
        perplexity = tf.exp(loss)
        accuracy = tf.reduce_mean(correct_predictions)
        event_accuracy = (
            tf.reduce_sum(correct_predictions * event_positions) /
            tf.reduce_sum(event_positions))
        no_event_accuracy = (
            tf.reduce_sum(correct_predictions * no_event_positions) /
            tf.reduce_sum(no_event_positions))

        loss_per_step = tf.reduce_sum(softmax_cross_entropy) / num_steps
        perplexity_per_step = tf.exp(loss_per_step)

        optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)

        train_op = contrib_slim.learning.create_train_op(
            loss, optimizer, clip_gradient_norm=hparams.clip_norm)
        tf.add_to_collection('train_op', train_op)

        vars_to_summarize = {
            'loss': loss,
            'metrics/perplexity': perplexity,
            'metrics/accuracy': accuracy,
            'metrics/event_accuracy': event_accuracy,
            'metrics/no_event_accuracy': no_event_accuracy,
            'metrics/loss_per_step': loss_per_step,
            'metrics/perplexity_per_step': perplexity_per_step,
        }
      elif mode == 'eval':
        vars_to_summarize, update_ops = contrib_metrics.aggregate_metric_map({
            'loss':
                tf.metrics.mean(softmax_cross_entropy),
            'metrics/accuracy':
                tf.metrics.accuracy(labels_flat, predictions_flat),
            'metrics/per_class_accuracy':
                tf.metrics.mean_per_class_accuracy(labels_flat,
                                                   predictions_flat,
                                                   num_classes),
            'metrics/event_accuracy':
                tf.metrics.recall(event_positions, correct_predictions),
            'metrics/no_event_accuracy':
                tf.metrics.recall(no_event_positions, correct_predictions),
            'metrics/loss_per_step':
                tf.metrics.mean(
                    tf.reduce_sum(softmax_cross_entropy) / num_steps,
                    weights=num_steps),
        })
        for updates_op in update_ops.values():
          tf.add_to_collection('eval_ops', updates_op)

        # Perplexity is just exp(loss) and doesn't need its own update op.
        vars_to_summarize['metrics/perplexity'] = tf.exp(
            vars_to_summarize['loss'])
        vars_to_summarize['metrics/perplexity_per_step'] = tf.exp(
            vars_to_summarize['metrics/loss_per_step'])

      for var_name, var_value in six.iteritems(vars_to_summarize):
        tf.summary.scalar(var_name, var_value)
        tf.add_to_collection(var_name, var_value)

    elif mode == 'generate':
      temperature = tf.placeholder(tf.float32, [])
      if isinstance(num_classes, numbers.Number):
        softmax_flat = tf.nn.softmax(
            tf.div(logits_flat, tf.fill([num_classes], temperature)))
        softmax = tf.reshape(
            softmax_flat, [hparams.batch_size, -1, num_classes])
      else:
        logits_offsets = np.cumsum([0] + num_classes)
        softmax = []
        for i in range(len(num_classes)):
          sm = tf.nn.softmax(
              tf.div(
                  logits_flat[:, logits_offsets[i]:logits_offsets[i + 1]],
                  tf.fill([num_classes[i]], temperature)))
          sm = tf.reshape(sm, [hparams.batch_size, -1, num_classes[i]])
          softmax.append(sm)

      tf.add_to_collection('inputs', inputs)
      tf.add_to_collection('temperature', temperature)
      tf.add_to_collection('softmax', softmax)
      # Flatten state tuples for metagraph compatibility.
      for state in tf_nest.flatten(initial_state):
        tf.add_to_collection('initial_state', state)
      for state in tf_nest.flatten(final_state):
        tf.add_to_collection('final_state', state)
Esempio n. 28
0
def load_noteseqs(fp,
                  batch_size,
                  seq_len,
                  max_discrete_times=None,
                  max_discrete_velocities=None,
                  augment_stretch_bounds=None,
                  augment_transpose_bounds=None,
                  randomize_chord_order=False,
                  repeat=False,
                  buffer_size=512):
    """Loads random subsequences from NoteSequences in TFRecords.

    Args:
      fp: List of shard fps.
      batch_size: Number of sequences in batch.
      seq_len: Length of subsequences.
      max_discrete_times: Maximum number of time buckets at 31.25Hz.
      max_discrete_velocities: Maximum number of velocity buckets.
      augment_stretch_bounds: Tuple containing speed ratio range.
      augment_transpose_bounds: Tuple containing semitone augmentation range.
      randomize_chord_order: If True, list notes of chord in random order.
      repeat: If True, continuously loop through records.
      buffer_size: Size of random queue.

    Returns:
      A dict containing the loaded tensor subsequences.

    Raises:
      ValueError: Invalid file format for shard filepaths.
    """

    # Deserializes NoteSequences and extracts numeric tensors
    def _str_to_tensor(note_sequence_str,
                       augment_stretch_bounds=None,
                       augment_transpose_bounds=None):
        """Converts a NoteSequence serialized proto to arrays."""
        note_sequence = music_pb2.NoteSequence.FromString(note_sequence_str)

        note_sequence_ordered = list(note_sequence.notes)

        if randomize_chord_order:
            random.shuffle(note_sequence_ordered)
            note_sequence_ordered = sorted(note_sequence_ordered,
                                           key=lambda n: n.start_time)
        else:
            note_sequence_ordered = sorted(note_sequence_ordered,
                                           key=lambda n:
                                           (n.start_time, n.pitch))

        # Transposition data augmentation
        if augment_transpose_bounds is not None:
            transpose_factor = np.random.randint(*augment_transpose_bounds)

            for note in note_sequence_ordered:
                note.pitch += transpose_factor

        note_sequence_ordered = [
            n for n in note_sequence_ordered
            if (n.pitch >= 21) and (n.pitch <= 108)
        ]

        pitches = np.array([note.pitch for note in note_sequence_ordered])
        velocities = np.array(
            [note.velocity for note in note_sequence_ordered])
        start_times = np.array(
            [note.start_time for note in note_sequence_ordered])
        end_times = np.array([note.end_time for note in note_sequence_ordered])

        # Tempo data augmentation
        if augment_stretch_bounds is not None:
            stretch_factor = np.random.uniform(*augment_stretch_bounds)
            start_times *= stretch_factor
            end_times *= stretch_factor

        if note_sequence_ordered:
            # Delta time start high to indicate free decision
            delta_times = np.concatenate([[100000.],
                                          start_times[1:] - start_times[:-1]])
        else:
            delta_times = np.zeros_like(start_times)

        return note_sequence_str, np.stack(
            [pitches, velocities, delta_times, start_times, end_times],
            axis=1).astype(np.float32)

    # Filter out excessively short examples
    def _filter_short(note_sequence_tensor, seq_len):
        note_sequence_len = tf.shape(note_sequence_tensor)[0]
        return tf.greater_equal(note_sequence_len, seq_len)

    # Take a random crop of a note sequence
    def _random_crop(note_sequence_tensor, seq_len):
        note_sequence_len = tf.shape(note_sequence_tensor)[0]
        start_max = note_sequence_len - seq_len
        start_max = tf.maximum(start_max, 0)

        start = tf.random_uniform([], maxval=start_max + 1, dtype=tf.int32)
        seq = note_sequence_tensor[start:start + seq_len]

        return seq

    # Find sharded filenames
    filenames = tf.gfile.Glob(fp)

    # Create dataset
    dataset = tf.data.TFRecordDataset(filenames)

    # Deserialize protos
    # pylint: disable=g-long-lambda
    dataset = dataset.map(lambda data: tf.py_func(lambda x: _str_to_tensor(
        x, augment_stretch_bounds, augment_transpose_bounds), [data],
                                                  (tf.string, tf.float32),
                                                  stateful=False))
    # pylint: enable=g-long-lambda

    # Filter sequences that are too short
    dataset = dataset.filter(lambda s, n: _filter_short(n, seq_len))

    # Get random crops
    dataset = dataset.map(lambda s, n: (s, _random_crop(n, seq_len)))

    # Shuffle
    if repeat:
        dataset = dataset.shuffle(buffer_size=buffer_size)

    # Make batches
    dataset = dataset.batch(batch_size, drop_remainder=True)

    # Repeat
    if repeat:
        dataset = dataset.repeat()

    # Get tensors
    iterator = dataset.make_one_shot_iterator()
    note_sequence_strs, note_sequence_tensors = iterator.get_next()

    # Set shapes
    note_sequence_strs.set_shape([batch_size])
    note_sequence_tensors.set_shape([batch_size, seq_len, 5])

    # Retrieve tensors
    note_pitches = tf.cast(note_sequence_tensors[:, :, 0] + 1e-4, tf.int32)
    note_velocities = tf.cast(note_sequence_tensors[:, :, 1] + 1e-4, tf.int32)
    note_delta_times = note_sequence_tensors[:, :, 2]
    note_start_times = note_sequence_tensors[:, :, 3]
    note_end_times = note_sequence_tensors[:, :, 4]

    # Onsets and frames model samples at 31.25Hz
    note_delta_times_int = tf.cast(
        tf.round(note_delta_times * 31.25) + 1e-4, tf.int32)

    # Reduce time discretizations to a fixed number of buckets
    if max_discrete_times is not None:
        note_delta_times_int = tf.minimum(note_delta_times_int,
                                          max_discrete_times)

    # Quantize velocities
    if max_discrete_velocities is not None:
        note_velocities = tf.minimum(
            note_velocities / (128 // max_discrete_velocities),
            max_discrete_velocities)

    # Build return dict
    note_tensors = {
        "pb_strs": note_sequence_strs,
        "midi_pitches": note_pitches,
        "velocities": note_velocities,
        "delta_times": note_delta_times,
        "delta_times_int": note_delta_times_int,
        "start_times": note_start_times,
        "end_times": note_end_times
    }

    return note_tensors
    def simulate(self, action):
        with tf.name_scope("environment/simulate"):
            actions = tf.concat([tf.expand_dims(action, axis=1)] *
                                self._num_frames,
                                axis=1)
            history = self.history_buffer.get_all_elements()
            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                # We only need 1 target frame here, set it.
                hparams_target_frames = self._model.hparams.video_num_target_frames
                self._model.hparams.video_num_target_frames = 1
                model_output = self._model.infer({
                    "inputs":
                    history,
                    "input_action":
                    actions,
                    "reset_internal_states":
                    self._reset_model.read_value()
                })
                self._model.hparams.video_num_target_frames = hparams_target_frames

            observ = tf.cast(tf.squeeze(model_output["targets"], axis=1),
                             self.observ_dtype)

            reward = tf.to_float(model_output["target_reward"])
            reward = tf.reshape(reward,
                                shape=(self.batch_size, )) + self._min_reward

            if self._intrinsic_reward_scale:
                # Use the model's uncertainty about its prediction as an intrinsic
                # reward. The uncertainty is measured by the log probability of the
                # predicted pixel value.
                if "targets_logits" not in model_output:
                    raise ValueError(
                        "The use of intrinsic rewards requires access to "
                        "the logits. Ensure that model.infer returns "
                        "'targets_logits'")
                uncertainty_reward = compute_uncertainty_reward(
                    model_output["targets_logits"], model_output["targets"])
                uncertainty_reward = tf.minimum(
                    1., self._intrinsic_reward_scale * uncertainty_reward)
                uncertainty_reward = tf.Print(uncertainty_reward,
                                              [uncertainty_reward],
                                              message="uncertainty_reward",
                                              first_n=1,
                                              summarize=8)
                reward += uncertainty_reward

            done = tf.constant(False, tf.bool, shape=(self.batch_size, ))

            with tf.control_dependencies([observ]):
                dump_frame_op = tf.cond(
                    self._video_condition,
                    lambda: tf.py_func(
                        self._video_dump_frame,  # pylint: disable=g-long-lambda
                        [observ, reward],
                        []),
                    tf.no_op)
                with tf.control_dependencies([
                        self._observ.assign(observ),
                        self.history_buffer.move_by_one_element(observ),
                        dump_frame_op
                ]):
                    clear_reset_model_op = tf.assign(self._reset_model,
                                                     tf.constant(0.0))
                    with tf.control_dependencies([clear_reset_model_op]):
                        return tf.identity(reward), tf.identity(done)
Esempio n. 30
0
def calculate_metrics(frame_probs,
                      onset_probs,
                      frame_predictions,
                      onset_predictions,
                      offset_predictions,
                      velocity_values,
                      sequence_label,
                      frame_labels,
                      sequence_id,
                      hparams,
                      min_pitch,
                      max_pitch,
                      onsets_only=False,
                      pitch_map=None):
    """Calculate metrics for a single example."""

    def add_metrics(precision, recall, f1, prefix):
        """Create and return a dict of metrics."""
        metrics = {
            prefix + '_precision': precision,
            prefix + '_recall': recall,
            prefix + '_f1_score': f1,
        }
        return metrics

    def make_metrics(note_precision,
                     note_recall,
                     note_f1,
                     note_with_velocity_precision,
                     note_with_velocity_recall,
                     note_with_velocity_f1,
                     note_with_offsets_precision,
                     note_with_offsets_recall,
                     note_with_offsets_f1,
                     note_with_offsets_velocity_precision,
                     note_with_offsets_velocity_recall,
                     note_with_offsets_velocity_f1,
                     processed_frame_predictions,
                     frame_labels,
                     onsets_only=False,
                     prefix=''):
        """Create a dict of onset, offset, frame and velocity metrics."""
        metrics = add_metrics(note_precision, note_recall, note_f1,
                              '_'.join(x for x in [prefix, 'note'] if x))
        metrics.update(
            add_metrics(note_with_velocity_precision, note_with_velocity_recall,
                        note_with_velocity_f1,
                        '_'.join(x for x in [prefix, 'note_with_velocity'] if x)))
        if not onsets_only:
            metrics.update(
                add_metrics(note_with_offsets_precision, note_with_offsets_recall,
                            note_with_offsets_f1,
                            '_'.join(x for x in [prefix, 'note_with_offsets'] if x)))
            metrics.update(
                add_metrics(
                    note_with_offsets_velocity_precision,
                    note_with_offsets_velocity_recall, note_with_offsets_velocity_f1,
                    '_'.join(x for x in [prefix, 'note_with_offsets_velocity'] if x)))
            frame_metrics = calculate_frame_metrics(
                frame_labels=frame_labels,
                frame_predictions=processed_frame_predictions)
            metrics.update(
                add_metrics(frame_metrics['precision'],
                            frame_metrics['recall'],
                            frame_metrics['f1_score'],
                            '_'.join(x for x in [prefix, 'frame'] if x)))
            metrics['frame_accuracy'] = frame_metrics['accuracy']
            metrics['frame_accuracy_without_true_negatives'] = frame_metrics[
                'accuracy_without_true_negatives']
        return metrics

    (note_precision, note_recall, note_f1, note_with_velocity_precision,
     note_with_velocity_recall, note_with_velocity_f1,
     note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1,
     note_with_offsets_velocity_precision, note_with_offsets_velocity_recall,
     note_with_offsets_velocity_f1, processed_frame_predictions) = tf.py_func(
        functools.partial(
            _calculate_metrics_py,
            hparams=hparams,
            min_pitch=min_pitch,
            max_pitch=max_pitch,
            onsets_only=onsets_only),
        inp=[
            frame_probs, onset_probs, frame_predictions, onset_predictions,
            offset_predictions, velocity_values, sequence_label, frame_labels,
            sequence_id
        ],
        Tout=([tf.float64] * 12) + [tf.float32],
        stateful=False)
    metrics = make_metrics(
        note_precision,
        note_recall,
        note_f1,
        note_with_velocity_precision,
        note_with_velocity_recall,
        note_with_velocity_f1,
        note_with_offsets_precision,
        note_with_offsets_recall,
        note_with_offsets_f1,
        note_with_offsets_velocity_precision,
        note_with_offsets_velocity_recall,
        note_with_offsets_velocity_f1,
        processed_frame_predictions,
        frame_labels,
        onsets_only=onsets_only)

    if pitch_map:
        for pitch, name in pitch_map.items():
            (note_precision, note_recall, note_f1, note_with_velocity_precision,
             note_with_velocity_recall, note_with_velocity_f1,
             note_with_offsets_precision, note_with_offsets_recall,
             note_with_offsets_f1, note_with_offsets_velocity_precision,
             note_with_offsets_velocity_recall, note_with_offsets_velocity_f1,
             processed_frame_predictions) = tf.py_func(
                functools.partial(
                    _calculate_metrics_py,
                    hparams=hparams,
                    min_pitch=min_pitch,
                    max_pitch=max_pitch,
                    onsets_only=onsets_only,
                    restrict_to_pitch=pitch),
                inp=[
                    frame_probs, onset_probs, frame_predictions, onset_predictions,
                    offset_predictions, velocity_values, sequence_label,
                    frame_labels, sequence_id + name
                ],
                Tout=([tf.float64] * 12) + [tf.float32],
                stateful=False)
            metrics.update(
                make_metrics(
                    note_precision,
                    note_recall,
                    note_f1,
                    note_with_velocity_precision,
                    note_with_velocity_recall,
                    note_with_velocity_f1,
                    note_with_offsets_precision,
                    note_with_offsets_recall,
                    note_with_offsets_f1,
                    note_with_offsets_velocity_precision,
                    note_with_offsets_velocity_recall,
                    note_with_offsets_velocity_f1,
                    processed_frame_predictions,
                    frame_labels,
                    onsets_only=onsets_only,
                    prefix='pitch/' + name))
    return metrics