def shaped_py_func(func, inputs, types, shapes, stateful=True, name=None): """Wrapper around tf.py_func that adds static shape information to the output. Args: func: Python function to call. inputs: List of input tensors. types: List of output tensor types. shapes: List of output tensor shapes. stateful: Whether or not the python function is stateful. name: Name of the op. Returns: output_tensors: List of output tensors. """ output_tensors = tf.py_func(func=func, inp=inputs, Tout=types, stateful=stateful, name=name) for t, s in zip(output_tensors, shapes): t.set_shape(s) return output_tensors
def tf_put_text(imgs, texts, text_size=1, text_pos=(0, 30), text_color=(0, 0, 1)): """Adds text to an image tensor.""" def _put_text(imgs, texts): """Python function that renders text onto a image.""" result = np.empty_like(imgs) for i in range(imgs.shape[0]): text = texts[i] if isinstance(text, bytes): text = six.ensure_text(text) # You may need to adjust text size and position and size. # If your images are in [0, 255] range replace (0, 0, 1) with (0, 0, 255) result[i, :, :, :] = cv2.putText(imgs[i, :, :, :], str(text), text_pos, cv2.FONT_HERSHEY_COMPLEX, text_size, text_color, 1) return result return tf.py_func(_put_text, [imgs, texts], Tout=imgs.dtype)
def reset(self, indices=None): """Reset the batch of environments. Args: indices: The batch indices of the environments to reset; defaults to all. Returns: Batch tensor of the new observations. """ if indices is None: indices = tf.range(len(self._batch_env)) observ_dtype = self._parse_dtype(self._batch_env.observation_space) observ = tf.py_func(self._batch_env.reset, [indices], observ_dtype, name='reset') observ = tf.check_numerics(observ, 'observ') reward = tf.zeros_like(indices, tf.float32) done = tf.zeros_like(indices, tf.bool) with tf.control_dependencies([ tf.scatter_update(self._observ, indices, observ), tf.scatter_update(self._reward, indices, reward), tf.scatter_update(self._done, indices, done) ]): return tf.identity(observ)
def simulate(self, action): """Step the batch of environments. The results of the step can be accessed from the variables defined below. Args: action: Tensor holding the batch of actions to apply. Returns: Operation. """ with tf.name_scope('environment/simulate'): if action.dtype in (tf.float16, tf.float32, tf.float64): action = tf.check_numerics(action, 'action') observ_dtype = self._parse_dtype(self._batch_env.observation_space) observ, reward, done = tf.py_func(lambda a: self._batch_env.step(a)[:3], [action], [observ_dtype, tf.float32, tf.bool], name='step') observ = tf.check_numerics(observ, 'observ') reward = tf.check_numerics(reward, 'reward') return tf.group(self._observ.assign(observ), self._action.assign(action), self._reward.assign(reward), self._done.assign(done))
def compute_gradients(self, loss, var_list, **kwargs): grads_and_vars = tf.train.AdamOptimizer.compute_gradients(self, loss, var_list, **kwargs) grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None] flat_grad = tf.concat([tf.reshape(g, (-1,)) for g, v in grads_and_vars], axis=0) shapes = [v.shape.as_list() for g, v in grads_and_vars] sizes = [int(np.prod(s)) for s in shapes] num_tasks = self.comm.Get_size() buf = np.zeros(sum(sizes), np.float32) def _collect_grads(flat_grad): self.comm.Allreduce(flat_grad, buf, op=MPI.SUM) np.divide(buf, float(num_tasks), out=buf) return buf avg_flat_grad = tf.py_func(_collect_grads, [flat_grad], tf.float32) avg_flat_grad.set_shape(flat_grad.shape) avg_grads = tf.split(avg_flat_grad, sizes, axis=0) avg_grads_and_vars = [(tf.reshape(g, v.shape), v) for g, (_, v) in zip(avg_grads, grads_and_vars)] return avg_grads_and_vars
def generate_detections(self, cls_outputs, box_outputs, indices, classes, image_id, image_scale, image_size=None, min_score_thresh=MIN_SCORE_THRESH, max_boxes_to_draw=MAX_DETECTIONS_PER_IMAGE, disable_pyfun=None, nms_configs=None): """Generate detections based on class and box predictions.""" if disable_pyfun: return _generate_detections_tf(cls_outputs, box_outputs, self._anchors.boxes, indices, classes, image_id, image_scale, image_size, min_score_thresh=min_score_thresh, max_boxes_to_draw=max_boxes_to_draw) else: logging.info('nms_configs=%s', nms_configs) return tf.py_func( functools.partial(_generate_detections, nms_configs=nms_configs), [ cls_outputs, box_outputs, self._anchors.boxes, indices, classes, image_id, image_scale, self._num_classes, max_boxes_to_draw, ], tf.float32)
def multiplicative_inverse(a, n): """Multiplicative inverse of a modulo n. Args: a: Tensor of shape [..., vocab_size]. It denotes an integer in the one-hot space. n: int Tensor of shape [...]. Returns: Tensor of same shape and dtype as a. """ a = tf.convert_to_tensor(a) n = tf.convert_to_tensor(n) vocab_size = a.shape[-1] a_dtype = a.dtype sparse_a = tf.argmax(a, axis=-1) # TODO(trandustin): Change to tf.py_function. sparse_outputs = tf1.py_func( py_multiplicative_inverse, [sparse_a, n], tf.int32) sparse_outputs.set_shape(sparse_a.shape) outputs = tf.one_hot(sparse_outputs, depth=vocab_size, dtype=a_dtype) return outputs
def get_sari(source_ids, prediction_ids, target_ids, max_gram_size=4): """Computes the SARI scores from the given source, prediction and targets. Args: source_ids: A 2D tf.Tensor of size (batch_size , sequence_length) prediction_ids: A 2D tf.Tensor of size (batch_size, sequence_length) target_ids: A 3D tf.Tensor of size (batch_size, number_of_targets, sequence_length) max_gram_size: int. largest n-gram size we care about (e.g. 3 for unigrams, bigrams, and trigrams) Returns: A 4-tuple of 1D float Tensors of size (batch_size) for the SARI score and the keep, addition and deletion scores. """ def get_sari_numpy(source_ids, prediction_ids, target_ids): """Iterate over elements in the batch and call the SARI function.""" sari_scores = [] keep_scores = [] add_scores = [] deletion_scores = [] # Iterate over elements in the batch. for source_ids_i, prediction_ids_i, target_ids_i in zip( source_ids, prediction_ids, target_ids): sari, keep, add, deletion = get_sari_score( source_ids_i, prediction_ids_i, target_ids_i, max_gram_size, BETA_FOR_SARI_DELETION_F_MEASURE) sari_scores.append(sari) keep_scores.append(keep) add_scores.append(add) deletion_scores.append(deletion) return (np.asarray(sari_scores), np.asarray(keep_scores), np.asarray(add_scores), np.asarray(deletion_scores)) sari, keep, add, deletion = tf.py_func( get_sari_numpy, [source_ids, prediction_ids, target_ids], [tf.float64, tf.float64, tf.float64, tf.float64]) return sari, keep, add, deletion
def generate_detections(self, cls_outputs, box_outputs, indices, classes, image_id, image_scale, level_index, min_score_thresh, max_boxes_to_draw, use_tf=False): if use_tf: return _generate_detections_tf(cls_outputs, box_outputs, self._anchors.boxes, indices, classes, image_id, image_scale, level_index, min_score_thresh=min_score_thresh, max_boxes_to_draw=max_boxes_to_draw) else: return tf.py_func( _generate_detections, [ cls_outputs, box_outputs, self._anchors.boxes, indices, classes, image_id, image_scale, self._num_classes, level_index, #image_id, image_scale, self._target_classes, level_index, ], [tf.float32, tf.float32, tf.float32, tf.float32])
def load_scann_searcher(var_name, checkpoint_path, num_neighbors, dimensions_per_block=2, num_leaves=1000, num_leaves_to_search=100, training_sample_size=100000): """Load scann searcher from checkpoint.""" with tf.device("/cpu:0"): np_db = tf.train.load_checkpoint(checkpoint_path).get_tensor(var_name) init_db = tf.py_func(lambda: np_db, [], tf.float32) init_db.set_shape(np_db.shape) tf_db = tf.get_local_variable(var_name, initializer=init_db) builder = ScannBuilder(db=tf_db, num_neighbors=num_neighbors, distance_measure="dot_product") builder = builder.tree(num_leaves=num_leaves, num_leaves_to_search=num_leaves_to_search, training_sample_size=training_sample_size) builder = builder.score_ah(dimensions_per_block=dimensions_per_block) searcher = builder.create_tf() return tf_db, searcher
def create_sampling_ops(self, use_staging): """Creates the ops necessary to sample from the replay buffer. Creates the transition dictionary containing the sampling tensors. Args: use_staging: bool, when True it would use a staging area to prefetch the next sampling batch. """ with tf.name_scope('sample_replay'): with tf.device('/cpu:*'): transition_type = self.memory.get_transition_elements() transition_tensors = tf.py_func( self.memory.sample_transition_batch, [], [return_entry.type for return_entry in transition_type], name='replay_sample_py_func') self._set_transition_shape(transition_tensors, transition_type) if use_staging: transition_tensors = self._set_up_staging(transition_tensors) self._set_transition_shape(transition_tensors, transition_type) # Unpack sample transition into member variables. self.unpack_transition(transition_tensors, transition_type)
def _predict_sequences(frame_predictions, onset_predictions, offset_predictions, velocity_values, hparams): """Predict a batch of sequences.""" def predict_sequence(frame_predictions, onset_predictions, offset_predictions, velocity_values, hparams): """Predict a single sequence.""" if hparams.drums_only: sequence_prediction = infer_util.predict_sequence( frame_predictions=onset_predictions, onset_predictions=onset_predictions, offset_predictions=onset_predictions, velocity_values=velocity_values, min_pitch=constants.MIN_MIDI_PITCH, hparams=hparams, onsets_only=True) for note in sequence_prediction.notes: note.is_drum = True else: sequence_prediction = infer_util.predict_sequence( frame_predictions=frame_predictions, onset_predictions=onset_predictions, offset_predictions=offset_predictions, velocity_values=velocity_values, min_pitch=constants.MIN_MIDI_PITCH, hparams=hparams) return sequence_prediction.SerializeToString() sequences = [] for i in range(frame_predictions.shape[0]): sequence = tf.py_func( functools.partial(predict_sequence, hparams=hparams), inp=[ frame_predictions[i], onset_predictions[i], offset_predictions[i], velocity_values[i], ], Tout=tf.string, stateful=False) sequence.set_shape([]) sequences.append(sequence) return tf.stack(sequences)
def get_batch(self, batch_size, config, num_unlabeled_per_class=0): """Generator producing a single batch of data (meta-train + meta-test).""" if num_unlabeled_per_class > 0: raise ValueError( 'Unlabeled samples are currently only supported in ' 'balanced inputs.') sup_sample = functools.partial(self._make_supervised_batch, batch_size=batch_size) images, labels, classes = tf.py_func(sup_sample, [], (tf.float32, tf.int32, tf.int32), stateful=True) some_label = list(self.data.keys())[0] # Setting a proper shape for post-processing to work images.set_shape([batch_size] + list(self.data[some_label][0].shape)) images = config.process(images) indices = tf.range(start=0, limit=tf.shape(images)[0], dtype=tf.int32) shuffled_indices = tf.random.shuffle(indices) images = tf.gather(images, shuffled_indices) labels = tf.gather(labels, shuffled_indices) classes = tf.gather(classes, shuffled_indices) return images, labels, classes
def parse_production_rule_sequence_batch(features, max_length, grammar): """Parses a batch of expressions to sequences of production rules. Args: features: Dict of tensors. This dict need to have key 'expression_string', the corresponding value is a string tensor with shape [batch_size]. max_length: Integer. The maximum length of the production rule sequence. grammar: arithmetic_grammar.Grammar. Returns: A feature dict. Key 'expression_sequence', 'expression_sequence_mask' are added to the dict. * 'expression_sequence': an int32 tensor with shape [batch_size, max_length]. * 'expression_sequence_mask': a boolean tensor with shape [batch_size, max_length]. """ def _parse_expressions_to_indices_sequences(expression_strings): return grammar.parse_expressions_to_indices_sequences( expression_strings=[ expression_string.decode('utf-8') for expression_string in expression_strings ], max_length=max_length) production_rule_sequences = tf.py_func( _parse_expressions_to_indices_sequences, [features['expression_string']], tf.int32, name='py_func-parse_production_rule_sequence_batch') production_rule_sequences.set_shape( (features['expression_string'].shape[0], max_length)) features['expression_sequence'] = production_rule_sequences features['expression_sequence_mask'] = tf.not_equal( production_rule_sequences, grammar.padding_rule_index) return features
def finalize(self, sess, inputs, head=-1): with sess.graph.as_default(): y_pred = tf.py_func(self.models[head].predict, [inputs], Tout=dtype) y_pred = tf.reshape(y_pred, (-1, self.num_outputs)) return y_pred,
def tf_put_text(imgs, texts): """Convert helper function to Tensorflow.""" return tf.py_func(put_text, [imgs, texts], Tout=imgs.dtype)
def train(): ''' Main function for training and simple evaluation. ''' with tf.Graph().as_default(): with tf.device('/gpu:'+str(GPU_INDEX)): pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \ heading_class_label_pl, heading_residual_label_pl, \ size_class_label_pl, size_residual_label_pl = \ MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT) is_training_pl = tf.placeholder(tf.bool, shape=()) # Note the global_step=batch parameter to minimize. # That tells the optimizer to increment the 'batch' parameter # for you every time it trains. batch = tf.get_variable('batch', [], initializer=tf.constant_initializer(0), trainable=False) bn_decay = get_bn_decay(batch) tf.summary.scalar('bn_decay', bn_decay) # Get model and losses end_points = MODEL.get_model(pointclouds_pl, one_hot_vec_pl, is_training_pl, bn_decay=bn_decay) loss = MODEL.get_loss(labels_pl, centers_pl, heading_class_label_pl, heading_residual_label_pl, size_class_label_pl, size_residual_label_pl, end_points) tf.summary.scalar('loss', loss) losses = tf.get_collection('losses') total_loss = tf.add_n(losses, name='total_loss') tf.summary.scalar('total_loss', total_loss) # Write summaries of bounding box IoU and segmentation accuracies iou2ds, iou3ds = tf.py_func(provider.compute_box3d_iou, [\ end_points['center'], \ end_points['heading_scores'], end_points['heading_residuals'], \ end_points['size_scores'], end_points['size_residuals'], \ centers_pl, \ heading_class_label_pl, heading_residual_label_pl, \ size_class_label_pl, size_residual_label_pl], \ [tf.float32, tf.float32]) end_points['iou2ds'] = iou2ds end_points['iou3ds'] = iou3ds tf.summary.scalar('iou_2d', tf.reduce_mean(iou2ds)) tf.summary.scalar('iou_3d', tf.reduce_mean(iou3ds)) correct = tf.equal(tf.argmax(end_points['mask_logits'], 2), tf.to_int64(labels_pl)) accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / \ float(BATCH_SIZE*NUM_POINT) tf.summary.scalar('segmentation accuracy', accuracy) # Get training operator learning_rate = get_learning_rate(batch) tf.summary.scalar('learning_rate', learning_rate) if OPTIMIZER == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) elif OPTIMIZER == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.minimize(loss, global_step=batch) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) # Add summary writers merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph) # Init variables if FLAGS.restore_model_path is None: init = tf.global_variables_initializer() sess.run(init) else: saver.restore(sess, FLAGS.restore_model_path) ops = {'pointclouds_pl': pointclouds_pl, 'one_hot_vec_pl': one_hot_vec_pl, 'labels_pl': labels_pl, 'centers_pl': centers_pl, 'heading_class_label_pl': heading_class_label_pl, 'heading_residual_label_pl': heading_residual_label_pl, 'size_class_label_pl': size_class_label_pl, 'size_residual_label_pl': size_residual_label_pl, 'is_training_pl': is_training_pl, 'logits': end_points['mask_logits'], 'centers_pred': end_points['center'], 'loss': loss, 'train_op': train_op, 'merged': merged, 'step': batch, 'end_points': end_points} for epoch in range(MAX_EPOCH): log_string('**** EPOCH %03d ****' % (epoch)) sys.stdout.flush() train_one_epoch(sess, ops, train_writer) eval_one_epoch(sess, ops, test_writer) # Save the variables to disk. if epoch % 10 == 0: save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt")) log_string("Model saved in file: %s" % save_path)
def py_func_metric(func, inputs, output_dtype=tf.float32): res = tf.py_func(func, inputs, [output_dtype], stateful=False) res = tf.reshape(res, []) return res
def get_batches( self, batch_sizes, config, num_unlabeled_per_class, ): """Generator producing multiple separate balanced batches of data. Arguments: batch_sizes: A list of batch sizes for all batches. config: Augmentation configuration. num_unlabeled_per_class: A list of integers indicating a number of "unlabeled" samples per class for each batch. Returns: A list of (images,labels) pairs produced for each output batch. """ sup_sample = functools.partial( self._make_semisupervised_batches, num_unlabeled_per_class=num_unlabeled_per_class, batch_sizes=batch_sizes) # Returned array is [images, ..., labels, ..., images, ..., labels, ...] types = [tf.float32] * self.num_labels types += [tf.int32] * self.num_labels types += [tf.int32] * self.num_labels types = types * len(batch_sizes) output = tf.py_func(sup_sample, [], types, stateful=True) images_labels = [] some_label = list(self.data.keys())[0] offs = 0 for batch_size in batch_sizes: images = output[offs:offs + self.num_labels] offs += self.num_labels labels = output[offs:offs + self.num_labels] offs += self.num_labels classes = output[offs:offs + self.num_labels] offs += self.num_labels # Setting a proper shape for post-processing to work samples_per_label = self._labels_per_batch(batch_size) for image, num_samples in zip(images, samples_per_label): image.set_shape([num_samples] + list(self.data[some_label][0].shape)) # Processing and combining in batches if config.children: images = [ config.process(image_mat, idx) for idx, image_mat in enumerate(images) ] else: images = [config.process(image_mat) for image_mat in images] images_labels.append((tf.concat(images, axis=0), tf.concat(labels, axis=0), tf.concat(classes, axis=0))) # Shuffling each batch output = [] for images, labels, classes in images_labels: indices = tf.range(start=0, limit=tf.shape(images)[0], dtype=tf.int32) shuffled_indices = tf.random.shuffle(indices) images = tf.gather(images, shuffled_indices) labels = tf.gather(labels, shuffled_indices) classes = tf.gather(classes, shuffled_indices) output.append((images, labels, classes)) return output
def _slow_tensorflow_op(self): """Returns a TensorFlow op that takes approximately 0.1s to complete.""" def slow_func(v): time.sleep(0.1) return v return tf.py_func(slow_func, [tf.constant(0.)], tf.float32).op
def get_estimator_eval_metric_ops(self, eval_dict): """Returns metric ops for use in tf.estimator.EstimatorSpec. Args: eval_dict: A dictionary that holds an image, groundtruth, and detections for a batched example. Note that, we use only the first example for visualization. See eval_util.result_dict_for_batched_example() for a convenient method for constructing such a dictionary. The dictionary contains fields.InputDataFields.original_image: [batch_size, H, W, 3] image. fields.InputDataFields.original_image_spatial_shape: [batch_size, 2] tensor containing the size of the original image. fields.InputDataFields.true_image_shape: [batch_size, 3] tensor containing the spatial size of the upadded original image. fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4] float32 tensor with groundtruth boxes in range [0.0, 1.0]. fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes] int64 tensor with 1-indexed groundtruth classes. fields.InputDataFields.groundtruth_instance_masks - (optional) [batch_size, num_boxes, H, W] int64 tensor with instance masks. fields.DetectionResultFields.detection_boxes - [batch_size, max_num_boxes, 4] float32 tensor with detection boxes in range [0.0, 1.0]. fields.DetectionResultFields.detection_classes - [batch_size, max_num_boxes] int64 tensor with 1-indexed detection classes. fields.DetectionResultFields.detection_scores - [batch_size, max_num_boxes] float32 tensor with detection scores. fields.DetectionResultFields.detection_masks - (optional) [batch_size, max_num_boxes, H, W] float32 tensor of binarized masks. fields.DetectionResultFields.detection_keypoints - (optional) [batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with keypoints. Returns: A dictionary of image summary names to tuple of (value_op, update_op). The `update_op` is the same for all items in the dictionary, and is responsible for saving a single side-by-side image with detections and groundtruth. Each `value_op` holds the tf.summary.image string for a given image. """ if self._max_examples_to_draw == 0: return {} images = self.images_from_evaluation_dict(eval_dict) def get_images(): """Returns a list of images, padded to self._max_images_to_draw.""" images = self._images while len(images) < self._max_examples_to_draw: images.append(np.array(0, dtype=np.uint8)) self.clear() return images def image_summary_or_default_string(summary_name, image): """Returns image summaries for non-padded elements.""" return tf.cond( tf.equal(tf.size(tf.shape(image)), 4), lambda: tf.summary.image(summary_name, image), lambda: tf.constant('')) if tf.executing_eagerly(): update_op = self.add_images([[images[0]]]) image_tensors = get_images() else: update_op = tf.py_func(self.add_images, [[images[0]]], []) image_tensors = tf.py_func( get_images, [], [tf.uint8] * self._max_examples_to_draw) eval_metric_ops = {} for i, image in enumerate(image_tensors): summary_name = self._summary_name_prefix + '/' + str(i) value_op = image_summary_or_default_string(summary_name, image) eval_metric_ops[summary_name] = (value_op, update_op) return eval_metric_ops
def infer_step(result, length): """Inference step.""" def print_info(samples, result, length, new_length): tf.logging.info( "length=%s new_length=%s length_diff=%s samples-result=%s", length, new_length, new_length - length, np.array_str(samples[0, -block_size - 1:-1, 0, 0] - result[0, -block_size:, 0, 0]).replace( "\n", ""), ) features["targets"] = tf.pad(result, [[0, 0], [0, 1], [0, 0], [0, 0]]) samples, logits, losses = self.sample(features) # pylint: disable=unused-variable _, top_k_indices = tf.nn.top_k( logits[:, :-1, :1, :, :], k=self._decode_hparams.guess_and_check_top_k) in_top_k = tf.reduce_any(tf.equal(tf.to_int64(top_k_indices), tf.expand_dims(result, 4)), axis=4) within_epsilon = tf.less_equal( tf.abs(result - samples[:, :-1, :1, :]), self._decode_hparams.guess_and_check_epsilon) if self._decode_hparams.guess_and_check_top_k: tf.logging.info("Using guess_and_check_top_k=%s", self._decode_hparams.guess_and_check_top_k) correct = in_top_k else: tf.logging.info("Using guess_and_check_epsilon=%s", self._decode_hparams.guess_and_check_epsilon) correct = within_epsilon correct_cumsum = tf.cumsum(tf.to_int32(correct), axis=1) perfect_cumsum = 1 + tf.range(tf.shape(correct)[1]) for axis in [0, 2, 3]: perfect_cumsum = tf.expand_dims(perfect_cumsum, axis=axis) new_length = tf.reduce_sum(tf.to_int32( tf.equal(correct_cumsum, perfect_cumsum)), axis=1) new_length = tf.squeeze(new_length, axis=[0, 1, 2]) new_length = tf.minimum(new_length, decode_length) new_result = tf.concat([ result[:, :new_length, :, :], tf.reshape(samples[:, new_length, :block_size, :], [1, block_size, 1, 1]) ], axis=1) with tf.control_dependencies([ tf.py_func(print_info, [samples, result, length, new_length], []) ]): new_result = tf.identity(new_result) return new_result, new_length
def inputs(config, files, is_training=False, is_testing=False): # parameters channels = config.in_channels threads = config.threads threads_py = config.threads_py scaling = config.scaling if is_training: num_epochs = config.num_epochs data_format = config.data_format patch_height = config.patch_height patch_width = config.patch_width batch_size = config.batch_size if is_training: buffer_size = config.buffer_size epoch_size = len(files) # dataset mapping function def parse1_func(filename): # read data dtype = tf.float32 image = tf.read_file(filename) image = tf.image.decode_image(image, channels=channels) shape = tf.shape(image) height = shape[-3] width = shape[-2] # pre down-scale for high resolution image dscale = 1 if is_training and config.pre_down: ''' if (width >= 3072 and height >= 1536) or (width >= 1536 and height >= 3072): dscale = 3 elif (width >= 1024 and height >= 512) or (width >= 512 and height >= 1024): dscale = 2 ''' def c_t(const1, const2, true_fn, false_fn): return tf.cond(tf.logical_or( tf.logical_and( tf.greater_equal(width, const1), tf.greater_equal(height, const2) ), tf.logical_and( tf.greater_equal(width, const2), tf.greater_equal(height, const1) ) ), true_fn, false_fn) dscale = c_t(3072, 1536, lambda: 3, lambda: c_t(1024, 512, lambda: 2, lambda: 1) ) elif is_testing and config.pre_down: ''' if (width >= 3072 and height >= 3072): dscale = 4 elif (width >= 2048 and height >= 2048): dscale = 3 elif (width >= 1024 and height >= 1024): dscale = 2 ''' def c_t(const1, true_fn, false_fn): return tf.cond(tf.logical_and( tf.greater_equal(width, const1), tf.greater_equal(height, const1) ), true_fn, false_fn) dscale = c_t(3072, lambda: 4, lambda: c_t(2048, lambda: 3, lambda: c_t(1024, lambda: 2, lambda: 1) ) ) # padding cropped_height = patch_height * dscale cropped_width = patch_width * dscale ''' if cropped_height > height or cropped_width > width: pad_height = cropped_height - height pad_width = cropped_width - width if pad_height > 0: pad_height = [pad_height // 2, pad_height - pad_height // 2] height = cropped_height else: pad_height = [0, 0] if pad_width > 0: pad_width = [pad_width // 2, pad_width - pad_width // 2] width = cropped_width else: pad_width = [0, 0] block = tf.pad(image, [pad_height, pad_width, [0, 0]], mode='REFLECT') else: block = image ''' cond_height = tf.greater(cropped_height, height) cond_width = tf.greater(cropped_width, width) def c_f1(): def _1(): ph = cropped_height - height return [ph // 2, ph - ph // 2] pad_height = tf.cond(cond_height, _1, lambda: [0, 0]) def _2(): pw = cropped_width - width return [pw // 2, pw - pw // 2] pad_width = tf.cond(cond_width, _2, lambda: [0, 0]) return tf.pad(image, [pad_height, pad_width, [0, 0]], mode='REFLECT') block = tf.cond(tf.logical_or(cond_height, cond_width), c_f1, lambda: image) height = tf.maximum(cropped_height, height) width = tf.maximum(cropped_width, width) # cropping if is_training: block = tf.random_crop(block, [cropped_height, cropped_width, channels]) block = tf.image.random_flip_up_down(block) block = tf.image.random_flip_left_right(block) elif is_testing: offset_height = (height - cropped_height) // 2 offset_width = (width - cropped_width) // 2 block = tf.image.crop_to_bounding_box(block, offset_height, offset_width, cropped_height, cropped_width) # convert dtype block = tf.image.convert_image_dtype(block, dtype, saturate=False) # random color augmentation if is_training and config.color_augmentation > 0: block = tf.image.random_saturation(block, 1 - config.color_augmentation, 1 + config.color_augmentation) block = tf.image.random_brightness(block, config.color_augmentation) block = tf.image.random_contrast(block, 1 - config.color_augmentation, 1 + config.color_augmentation) # data format conversion block.set_shape([None, None, channels]) if data_format == 'NCHW': block = tf.transpose(block, (2, 0, 1)) # return return block # tf.py_func processing using vapoursynth, numpy, etc. import threading import vapoursynth as vs from scipy import ndimage def eval_random_select(n, clips): rand_idx = np.random.randint(0, len(clips)) return clips[rand_idx] def SigmoidInverse(clip, thr=0.5, cont=6.5, epsilon=1e-6): assert clip.format.sample_type == vs.FLOAT x0 = 1 / (1 + np.exp(cont * thr)) x1 = 1 / (1 + np.exp(cont * (thr - 1))) # thr - log(max(1 / max(x * (x1 - x0) + x0, epsilon) - 1, epsilon)) / cont expr = '{thr} 1 x {x1_x0} * {x0} + {epsilon} max / 1 - {epsilon} max log {cont_rec} * -'.format(thr=thr, cont_rec=1 / cont, epsilon=epsilon, x0=x0, x1_x0=x1 - x0) return clip.std.Expr(expr) def SigmoidDirect(clip, thr=0.5, cont=6.5): assert clip.format.sample_type == vs.FLOAT x0 = 1 / (1 + np.exp(cont * thr)) x1 = 1 / (1 + np.exp(cont * (thr - 1))) # (1 / (1 + exp(cont * (thr - x))) - x0) / (x1 - x0) expr = '1 1 {cont} {thr} x - * exp + / {x0} - {x1_x0_rec} *'.format(thr=thr, cont=cont, x0=x0, x1_x0_rec=1 / (x1 - x0)) return clip.std.Expr(expr) _lock = threading.Lock() _index_ref = [0] _src_ref = [None for _ in range(epoch_size)] core = vs.get_core(threads=1 if is_testing else threads_py) core.max_cache_size = 8000 _dscales = list(range(1, 5)) if config.pre_down else [1] _src_blk = [core.std.BlankClip(None, patch_width * s, patch_height * s, format=vs.RGBS, length=epoch_size) for s in _dscales] _dst_blk = core.std.BlankClip(None, patch_width // scaling, patch_height // scaling, format=vs.RGBS, length=epoch_size) def src_frame_func(n, f): f_out = f.copy() planes = f_out.format.num_planes # output for p in range(planes): f_arr = np.array(f_out.get_write_array(p), copy=False) np.copyto(f_arr, _src_ref[n][p, :, :] if data_format == 'NCHW' else _src_ref[n][:, :, p]) # set frame properties f_out.props['_Primaries'] = 1 # BT.709 f_out.props['_Transfer'] = 1 # BT.709 return f_out _srcs = [s.std.ModifyFrame(s, src_frame_func) for s in _src_blk] _srcs_linear = [s.resize.Bicubic(transfer_s='linear') for s in _srcs] def src_down_func(clip): dw = patch_width dh = patch_height if clip.width != dw or clip.height != dh: clip = SigmoidInverse(clip) clip = clip.resize.Bicubic(dw, dh, filter_param_a=0, filter_param_b=0.5) clip = SigmoidDirect(clip) return clip if config.pre_down: _srcs_linear = [src_down_func(s) for s in _srcs_linear] _srcs = _srcs[0:1] + [s.resize.Bicubic(transfer_s='709') for s in _srcs_linear[1:]] def src_select_eval(n): # select source shape = _src_ref[n].shape sh = shape[-2 if data_format == 'NCHW' else -3] dscale = sh // patch_height # downscale if needed clip = _srcs[dscale - 1] return clip if config.pre_down: _src = _src_blk[0].std.FrameEval(src_select_eval) else: _src = _srcs[0] def resize_set_func(clip, convert_linear=False): # disable resize set when scaling=1 if scaling == 1: return clip # parameters dw = int(patch_width / scaling + 0.5) dh = int(patch_height / scaling + 0.5) rets = {} # resizers rets['bilinear'] = clip.resize.Bilinear(dw, dh) rets['spline16'] = clip.resize.Spline16(dw, dh) rets['spline36'] = clip.resize.Spline36(dw, dh) for taps in range(2, 12): rets['lanczos{}'.format(taps)] = clip.resize.Lanczos(dw, dh, filter_param_a=taps) # linear to gamma if convert_linear: for key in rets: rets[key] = rets[key].resize.Bicubic(transfer_s='709', transfer_in_s='linear') return rets def resize_eval(n, src, src_linear, resizes, linear_resizes, dscale=None): # select source if dscale is True: shape = _src_ref[n].shape sh = shape[-2 if data_format == 'NCHW' else -3] dscale = max(1, sh // patch_height) if dscale: src = src[dscale - 1] src_linear = src_linear[dscale - 1] resizes = resizes[dscale - 1] linear_resizes = linear_resizes[dscale - 1] # initialize clip = src # multiple stages max_iter = config.multistage_resize * 2 if scaling != 1: max_iter += 1 for _ in range(max_iter): downscale = _ % 2 == 0 # randomly skip multistage resize scaling_match = _ % 2 == 0 if scaling == 1 else _ % 2 == 1 # whether the last scaling matches output size if _ > 0 and scaling_match and np.random.uniform(0, 1) < 0.7: break # scaling size if scaling == 1: scaling1 = 1 while scaling1 < 4 / 3: # [4 / 3, ~2) scaling1 = 2 ** np.random.normal(0.6, 0.2) else: scaling1 = scaling dw = int(patch_width / scaling1 + 0.5) if downscale else patch_width dh = int(patch_height / scaling1 + 0.5) if downscale else patch_height use_resize_set = scaling != 1 and _ == 0 # random number generator rand_val = np.random.uniform(-1, 1) if config.random_resizer == 0 else config.random_resizer abs_rand = np.abs(rand_val) # random gamma-to-linear if _ == 0: clip = src_linear if rand_val < 0 else src resizes = linear_resizes if rand_val < 0 else resizes # random resizers if abs_rand < (0.05 if downscale else 0.05): clip = resizes['bilinear'] if use_resize_set else clip.resize.Bilinear(dw, dh) elif abs_rand < (0.10 if downscale else 0.10): clip = resizes['spline16'] if use_resize_set else clip.resize.Spline16(dw, dh) elif abs_rand < (0.15 if downscale else 0.15): clip = resizes['spline36'] if use_resize_set else clip.resize.Spline36(dw, dh) elif abs_rand < (0.25 if downscale else 0.40): # Lanczos taps=[2, 12) taps = int(np.clip(np.random.exponential(2) + 2, 2, 11)) clip = resizes['lanczos{}'.format(taps)] if use_resize_set else clip.resize.Lanczos(dw, dh, filter_param_a=taps) elif abs_rand < (0.50 if downscale else 0.50): # Catmull-Rom b = 0 if config.random_resizer == 0.4 else np.random.normal(0, 1/6) c = (1 - b) * 0.5 clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c) elif abs_rand < (0.60 if downscale else 0.60): # Mitchell-Netravali (standard Bicubic) b = 1/3 if config.random_resizer == 0.6 else np.random.normal(1/3, 1/6) c = (1 - b) * 0.5 clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c) elif abs_rand < (0.80 if downscale else 0.70): # sharp Bicubic b = -0.5 if config.random_resizer == 0.7 else np.random.normal(-0.5, 0.25) c = b * -0.5 clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c) elif abs_rand < (0.85 if downscale else 0.80): # soft Bicubic b = 0.75 if config.random_resizer == 0.8 else np.random.normal(0.75, 0.25) c = 1 - b clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c) elif abs_rand < (1.00 if downscale else 0.90): # arbitrary Bicubic b = np.random.normal(0, 0.5) c = np.random.normal(0.25, 0.25) clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c) elif abs_rand < (1.00 if downscale else 1.00): # Bicubic with haloing & aliasing b = np.random.normal(0, 1) # amount of haloing c = -1 # when c is around b * 0.8, aliasing is minimum if b >= 0: # with aliasing b = 1 + b while c < 0 or c > b * 1.2: c = np.random.normal(b * 0.4, b * 0.2) else: # without aliasing b = 1 - b while c < 0 or c > b * 1.2: c = np.random.normal(b * 0.8, b * 0.2) b = -b clip = clip.resize.Bicubic(dw, dh, filter_param_a=b, filter_param_b=c) # return return clip _resizes = [resize_set_func(s, convert_linear=False) for s in _srcs] _linear_resizes = [resize_set_func(s, convert_linear=config.multistage_resize == 0) for s in _srcs_linear] _dst = _dst_blk.std.FrameEval(lambda n: resize_eval(n, _srcs, _srcs_linear, _resizes, _linear_resizes, dscale=True)) _dst = _dst.resize.Bicubic(transfer_s='709') # convert to BT.709 transfer # chroma subsampling def chroma_subsampling(src): YUV420PS = core.register_format(vs.YUV, vs.FLOAT, 32, 1, 1) src420 = src.resize.Bicubic(format=YUV420PS, matrix_s='709', filter_param_a=0, filter_param_b=0.5) clips = [src420.resize.Bilinear(format=vs.RGBS, matrix_in_s='709'), src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=1.0, filter_param_b=0.0), src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=0.5, filter_param_b=0.5), src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=1 / 3, filter_param_b=1 / 3), src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=0, filter_param_b=0.5), src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=-0.5, filter_param_b=0.25), src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=-1, filter_param_b=0.3), src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=-1, filter_param_b=0.8), src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=-2, filter_param_b=0.6), src420.resize.Bicubic(format=vs.RGBS, matrix_in_s='709', filter_param_a=-2, filter_param_b=1.6)] clips += [src] * 6 clip = src.std.FrameEval(lambda n: eval_random_select(n, clips)) return clip _dst = chroma_subsampling(_dst) # parser def parse2_pyfunc(label): channel_index = -3 if data_format == 'NCHW' else -1 dscale = label.shape[-2 if data_format == 'NCHW' else -3] // patch_height # safely acquire and increase shared index _lock.acquire() index = _index_ref[0] _index_ref[0] = (index + 1) % epoch_size _lock.release() # processing using vs _src_ref[index] = label if config.pre_down and dscale > 1: f_src = _src.get_frame(index) f_dst = _dst.get_frame(index) _src_ref[index] = None # vs.VideoFrame to np.ndarray if config.pre_down and dscale > 1: label = [] planes = f_src.format.num_planes for p in range(planes): f_arr = np.array(f_src.get_read_array(p), copy=False) label.append(f_arr) label = np.stack(label, axis=channel_index) data = [] planes = f_dst.format.num_planes for p in range(planes): f_arr = np.array(f_dst.get_read_array(p), copy=False) data.append(f_arr) data = np.stack(data, axis=channel_index) # add Gaussian noise of random scale and random spatial correlation def _add_noise(data, noise_scale, noise_corr): # noise spatial correlation def noise_correlation(noise, corr): if corr > 0: sigma = np.random.normal(corr, corr) if sigma > 0.25: sigma = [0, sigma, sigma] if data_format == 'NCHW' else [sigma, sigma, 0] noise = ndimage.gaussian_filter(noise, sigma, truncate=2.0) return noise if noise_scale <= 0: return data rand_val = np.random.uniform(0, 1) scale = np.random.exponential(noise_scale) if rand_val < 0.2 or scale < 0.002: # won't add noise return data noise_shape = list(data.shape) if rand_val < 0.35: # RGB noise noise = np.random.normal(0.0, scale, noise_shape).astype(np.float32) noise = noise_correlation(noise, noise_corr) else: # Y/YUV noise noise_shape[channel_index] = 1 noise_y = np.random.normal(0.0, scale, noise_shape).astype(np.float32) noise_y = noise_correlation(noise_y, noise_corr) scale_uv = np.random.exponential(noise_scale / 2) if rand_val < 0.55 and scale_uv > 0.002: # YUV noise noise_u = np.random.normal(0.0, scale_uv, noise_shape).astype(np.float32) noise_u = noise_correlation(noise_u, noise_corr * 1.5) noise_v = np.random.normal(0.0, scale_uv, noise_shape).astype(np.float32) noise_v = noise_correlation(noise_v, noise_corr * 1.5) rand_val2 = np.random.uniform(0, 1) if rand_val2 < 0.3: # Rec.601 Kr = 0.299 Kg = 0.587 Kb = 0.114 elif rand_val2 < 0.9: # Rec.709 Kr = 0.2126 Kg = 0.7152 Kb = 0.0722 else: # Rec.2020 Kr = 0.2627 Kg = 0.6780 Kb = 0.0593 noise_r = noise_y + ((1 - Kr) / 2) * noise_v noise_b = noise_y + ((1 - Kb) / 2) * noise_u noise_g = (1 / Kg) * noise_y - (Kr / Kg) * noise_r - (Kb / Kg) * noise_b noise = [noise_r, noise_g, noise_b] else: noise = [noise_y, noise_y, noise_y] noise = np.concatenate(noise, axis=channel_index) # adding noise return data + noise data = _add_noise(data, config.noise_scale, config.noise_corr) # return return data, label def parse3_func(data, label): # final process data = tf.clip_by_value(data, 0.0, 1.0) label = tf.clip_by_value(label, 0.0, 1.0) # JPEG encoding def _jpeg_coding(data, quality_step, random_seed=None): if quality_step <= 0: return data steps = 16 prob_step = 0.02 rand_val = tf.random_uniform([], -1, 1, seed=random_seed) abs_rand = tf.abs(rand_val) def c_f1(data): if data_format == 'NCHW': data = tf.transpose(data, (1, 2, 0)) data = tf.image.convert_image_dtype(data, tf.uint8, saturate=True) def _f1(quality, chroma_ds): quality = int(quality + 0.5) return tf.image.encode_jpeg(data, quality=quality, chroma_downsampling=chroma_ds) def _cond_recur(abs_rand, count=15, chroma_ds=False, prob=0.0, quality=100.0): prob += prob_step if count <= 0: return _f1(quality, chroma_ds) else: return tf.cond(abs_rand < prob, lambda: _f1(quality, chroma_ds), lambda: _cond_recur(abs_rand, count - 1, chroma_ds, prob, quality - config.jpeg_coding)) data = tf.cond(rand_val < 0, lambda: _cond_recur(abs_rand, steps - 1, True), lambda: _cond_recur(abs_rand, steps - 1, False)) data = tf.image.decode_jpeg(data) data = tf.image.convert_image_dtype(data, tf.float32, saturate=False) if data_format == 'NCHW': data = tf.transpose(data, (2, 0, 1)) return data return tf.cond(rand_val < prob_step * steps, lambda: c_f1(data), lambda: data) data = _jpeg_coding(data, config.jpeg_coding, config.random_seed if is_testing else None) # return return data, label # Dataset API dataset = tf.data.Dataset.from_tensor_slices((files)) if is_training and buffer_size > 0: dataset = dataset.shuffle(buffer_size) dataset = dataset.map(parse1_func, num_parallel_calls=1 if is_testing else threads) dataset = dataset.map(lambda label: tuple(tf.py_func(parse2_pyfunc, [label], [tf.float32, tf.float32])), num_parallel_calls=1 if is_testing else threads_py) dataset = dataset.map(parse3_func, num_parallel_calls=1 if is_testing else threads) dataset = dataset.batch(batch_size) dataset = dataset.repeat(num_epochs if is_training else None) dataset = dataset.prefetch(64) # return iterator iterator = dataset.make_one_shot_iterator() next_data, next_label = iterator.get_next() # data shape declaration data_shape = [None] * 4 data_shape[-3 if data_format == 'NCHW' else -1] = channels next_data.set_shape(data_shape) next_label.set_shape(data_shape) return next_data, next_label
def add_eval_dict(self, eval_dict): """Observes an evaluation result dict for a single example. When executing eagerly, once all observations have been observed by this method you can use `.evaluate()` to get the final metrics. When using `tf.estimator.Estimator` for evaluation this function is used by `get_estimator_eval_metric_ops()` to construct the metric update op. Args: eval_dict: A dictionary that holds tensors for evaluating an object detection model, returned from eval_util.result_dict_for_single_example(). Returns: None when executing eagerly, or an update_op that can be used to update the eval metrics in `tf.estimator.EstimatorSpec`. """ def update_op(image_id_batched, groundtruth_boxes_batched, groundtruth_classes_batched, groundtruth_instance_masks_batched, groundtruth_verified_neg_classes_batched, groundtruth_not_exhaustive_classes_batched, num_gt_boxes_per_image, detection_scores_batched, detection_classes_batched, detection_masks_batched, num_det_boxes_per_image, original_image_spatial_shape): """Update op for metrics.""" for (image_id, groundtruth_boxes, groundtruth_classes, groundtruth_instance_masks, groundtruth_verified_neg_classes, groundtruth_not_exhaustive_classes, num_gt_box, detection_scores, detection_classes, detection_masks, num_det_box, original_image_shape) in zip( image_id_batched, groundtruth_boxes_batched, groundtruth_classes_batched, groundtruth_instance_masks_batched, groundtruth_verified_neg_classes_batched, groundtruth_not_exhaustive_classes_batched, num_gt_boxes_per_image, detection_scores_batched, detection_classes_batched, detection_masks_batched, num_det_boxes_per_image, original_image_spatial_shape): self.add_single_ground_truth_image_info( image_id, { input_data_fields.groundtruth_boxes: groundtruth_boxes[:num_gt_box], input_data_fields.groundtruth_classes: groundtruth_classes[:num_gt_box], input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks[:num_gt_box] [:original_image_shape[0], :original_image_shape[1]], input_data_fields.groundtruth_verified_neg_classes: groundtruth_verified_neg_classes, input_data_fields.groundtruth_not_exhaustive_classes: groundtruth_not_exhaustive_classes }) self.add_single_detected_image_info( image_id, { 'detection_scores': detection_scores[:num_det_box], 'detection_classes': detection_classes[:num_det_box], 'detection_masks': detection_masks[:num_det_box] [:original_image_shape[0], :original_image_shape[1]] }) # Unpack items from the evaluation dictionary. input_data_fields = fields.InputDataFields detection_fields = fields.DetectionResultFields image_id = eval_dict[input_data_fields.key] original_image_spatial_shape = eval_dict[ input_data_fields.original_image_spatial_shape] groundtruth_boxes = eval_dict[input_data_fields.groundtruth_boxes] groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes] groundtruth_instance_masks = eval_dict[ input_data_fields.groundtruth_instance_masks] groundtruth_verified_neg_classes = eval_dict[ input_data_fields.groundtruth_verified_neg_classes] groundtruth_not_exhaustive_classes = eval_dict[ input_data_fields.groundtruth_not_exhaustive_classes] num_gt_boxes_per_image = eval_dict.get( input_data_fields.num_groundtruth_boxes, None) detection_scores = eval_dict[detection_fields.detection_scores] detection_classes = eval_dict[detection_fields.detection_classes] detection_masks = eval_dict[detection_fields.detection_masks] num_det_boxes_per_image = eval_dict.get( detection_fields.num_detections, None) if not image_id.shape.as_list(): # Apply a batch dimension to all tensors. image_id = tf.expand_dims(image_id, 0) groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0) groundtruth_classes = tf.expand_dims(groundtruth_classes, 0) groundtruth_instance_masks = tf.expand_dims( groundtruth_instance_masks, 0) groundtruth_verified_neg_classes = tf.expand_dims( groundtruth_verified_neg_classes, 0) groundtruth_not_exhaustive_classes = tf.expand_dims( groundtruth_not_exhaustive_classes, 0) detection_scores = tf.expand_dims(detection_scores, 0) detection_classes = tf.expand_dims(detection_classes, 0) detection_masks = tf.expand_dims(detection_masks, 0) if num_gt_boxes_per_image is None: num_gt_boxes_per_image = tf.shape(groundtruth_boxes)[1:2] else: num_gt_boxes_per_image = tf.expand_dims( num_gt_boxes_per_image, 0) if num_det_boxes_per_image is None: num_det_boxes_per_image = tf.shape(detection_scores)[1:2] else: num_det_boxes_per_image = tf.expand_dims( num_det_boxes_per_image, 0) else: if num_gt_boxes_per_image is None: num_gt_boxes_per_image = tf.tile( tf.shape(groundtruth_boxes)[1:2], multiples=tf.shape(groundtruth_boxes)[0:1]) if num_det_boxes_per_image is None: num_det_boxes_per_image = tf.tile( tf.shape(detection_scores)[1:2], multiples=tf.shape(detection_scores)[0:1]) return tf.py_func(update_op, [ image_id, groundtruth_boxes, groundtruth_classes, groundtruth_instance_masks, groundtruth_verified_neg_classes, groundtruth_not_exhaustive_classes, num_gt_boxes_per_image, detection_scores, detection_classes, detection_masks, num_det_boxes_per_image, original_image_spatial_shape ], [])
def generate_detections(self, cls_outputs, box_outputs, indices, classes, image_id, image_scale): return tf.py_func(_generate_detections, [ cls_outputs, box_outputs, self._anchors.boxes, indices, classes, image_id, image_scale, self._num_classes ], tf.float32)
def model_fn(features, labels, mode, params, config): """Build the model function for use in an estimator. Args: features: The input features for the estimator. labels: The labels, unused here. mode: Signifies whether it is train or test or predict. params: Some hyperparameters as a dictionary. config: The RunConfig, unused here. Returns: EstimatorSpec: A tf.estimator.EstimatorSpec instance. """ del labels, config encoder = make_encoder(params["activation"], params["num_topics"], params["layer_sizes"]) decoder, topics_words = make_decoder(params["num_topics"], features.shape[1]) topics_prior = make_prior(params["num_topics"], params["prior_initial_value"]) alpha = topics_prior.concentration topics_posterior = encoder(features) topics = topics_posterior.sample(seed=234) random_reconstruction = decoder(topics) reconstruction = random_reconstruction.log_prob(features) tf1.summary.scalar("reconstruction", tf.reduce_mean(reconstruction)) # Compute the KL-divergence between two Dirichlets analytically. # The sampled KL does not work well for "sparse" distributions # (see Appendix D of [2]). kl = tfd.kl_divergence(topics_posterior, topics_prior) tf1.summary.scalar("kl", tf.reduce_mean(kl)) # Ensure that the KL is non-negative (up to a very small slack). # Negative KL can happen due to numerical instability. with tf.control_dependencies( [tf.debugging.assert_greater(kl, -1e-3, message="kl")]): kl = tf.identity(kl) elbo = reconstruction - kl avg_elbo = tf.reduce_mean(elbo) tf1.summary.scalar("elbo", avg_elbo) loss = -avg_elbo # Perform variational inference by minimizing the -ELBO. global_step = tf1.train.get_or_create_global_step() optimizer = tf1.train.AdamOptimizer(params["learning_rate"]) # This implements the "burn-in" for prior parameters (see Appendix D of [2]). # For the first prior_burn_in_steps steps they are fixed, and then trained # jointly with the other parameters. grads_and_vars = optimizer.compute_gradients(loss) grads_and_vars_except_prior = [ x for x in grads_and_vars if x[1] not in topics_prior.variables] def train_op_except_prior(): return optimizer.apply_gradients( grads_and_vars_except_prior, global_step=global_step) def train_op_all(): return optimizer.apply_gradients( grads_and_vars, global_step=global_step) train_op = tf.cond( pred=global_step < params["prior_burn_in_steps"], true_fn=train_op_except_prior, false_fn=train_op_all) # The perplexity is an exponent of the average negative ELBO per word. words_per_document = tf.reduce_sum(features, axis=1) log_perplexity = -elbo / words_per_document tf1.summary.scalar("perplexity", tf.exp(tf.reduce_mean(log_perplexity))) (log_perplexity_tensor, log_perplexity_update) = tf1.metrics.mean(log_perplexity) perplexity_tensor = tf.exp(log_perplexity_tensor) # Obtain the topics summary. Implemented as a py_func for simplicity. topics = tf1.py_func( functools.partial(get_topics_strings, vocabulary=params["vocabulary"]), [topics_words, alpha], tf.string, stateful=False) tf1.summary.text("topics", topics) return tf1.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, eval_metric_ops={ "elbo": tf1.metrics.mean(elbo), "reconstruction": tf1.metrics.mean(reconstruction), "kl": tf1.metrics.mean(kl), "perplexity": (perplexity_tensor, log_perplexity_update), "topics": (topics, tf.no_op()), }, )
def build(): """Builds the Tensorflow graph.""" inputs, labels, lengths = None, None, None if mode in ('train', 'eval'): if isinstance(no_event_label, numbers.Number): label_shape = [] else: label_shape = [len(no_event_label)] inputs, labels, lengths = magenta.common.get_padded_batch( sequence_example_file_paths, hparams.batch_size, input_size, label_shape=label_shape, shuffle=mode == 'train') elif mode == 'generate': inputs = tf.placeholder(tf.float32, [hparams.batch_size, None, input_size]) if isinstance(encoder_decoder, magenta.music.OneHotIndexEventSequenceEncoderDecoder): expanded_inputs = tf.one_hot( tf.cast(tf.squeeze(inputs, axis=-1), tf.int64), encoder_decoder.input_depth) else: expanded_inputs = inputs dropout_keep_prob = 1.0 if mode == 'generate' else hparams.dropout_keep_prob if hparams.use_cudnn: outputs, initial_state, final_state = make_cudnn( expanded_inputs, hparams.rnn_layer_sizes, hparams.batch_size, mode, dropout_keep_prob=dropout_keep_prob, residual_connections=hparams.residual_connections) else: cell = make_rnn_cell( hparams.rnn_layer_sizes, dropout_keep_prob=dropout_keep_prob, attn_length=hparams.attn_length, residual_connections=hparams.residual_connections) initial_state = cell.zero_state(hparams.batch_size, tf.float32) outputs, final_state = tf.nn.dynamic_rnn( cell, inputs, sequence_length=lengths, initial_state=initial_state, swap_memory=True) outputs_flat = magenta.common.flatten_maybe_padded_sequences( outputs, lengths) if isinstance(num_classes, numbers.Number): num_logits = num_classes else: num_logits = sum(num_classes) logits_flat = contrib_layers.linear(outputs_flat, num_logits) if mode in ('train', 'eval'): labels_flat = magenta.common.flatten_maybe_padded_sequences( labels, lengths) if isinstance(num_classes, numbers.Number): softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels_flat, logits=logits_flat) predictions_flat = tf.argmax(logits_flat, axis=1) else: logits_offsets = np.cumsum([0] + num_classes) softmax_cross_entropy = [] predictions = [] for i in range(len(num_classes)): softmax_cross_entropy.append( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels_flat[:, i], logits=logits_flat[ :, logits_offsets[i]:logits_offsets[i + 1]])) predictions.append( tf.argmax(logits_flat[ :, logits_offsets[i]:logits_offsets[i + 1]], axis=1)) predictions_flat = tf.stack(predictions, 1) correct_predictions = tf.to_float( tf.equal(labels_flat, predictions_flat)) event_positions = tf.to_float(tf.not_equal(labels_flat, no_event_label)) no_event_positions = tf.to_float(tf.equal(labels_flat, no_event_label)) # Compute the total number of time steps across all sequences in the # batch. For some models this will be different from the number of RNN # steps. def batch_labels_to_num_steps(batch_labels, lengths): num_steps = 0 for labels, length in zip(batch_labels, lengths): num_steps += encoder_decoder.labels_to_num_steps(labels[:length]) return np.float32(num_steps) num_steps = tf.py_func( batch_labels_to_num_steps, [labels, lengths], tf.float32) if mode == 'train': loss = tf.reduce_mean(softmax_cross_entropy) perplexity = tf.exp(loss) accuracy = tf.reduce_mean(correct_predictions) event_accuracy = ( tf.reduce_sum(correct_predictions * event_positions) / tf.reduce_sum(event_positions)) no_event_accuracy = ( tf.reduce_sum(correct_predictions * no_event_positions) / tf.reduce_sum(no_event_positions)) loss_per_step = tf.reduce_sum(softmax_cross_entropy) / num_steps perplexity_per_step = tf.exp(loss_per_step) optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate) train_op = contrib_slim.learning.create_train_op( loss, optimizer, clip_gradient_norm=hparams.clip_norm) tf.add_to_collection('train_op', train_op) vars_to_summarize = { 'loss': loss, 'metrics/perplexity': perplexity, 'metrics/accuracy': accuracy, 'metrics/event_accuracy': event_accuracy, 'metrics/no_event_accuracy': no_event_accuracy, 'metrics/loss_per_step': loss_per_step, 'metrics/perplexity_per_step': perplexity_per_step, } elif mode == 'eval': vars_to_summarize, update_ops = contrib_metrics.aggregate_metric_map({ 'loss': tf.metrics.mean(softmax_cross_entropy), 'metrics/accuracy': tf.metrics.accuracy(labels_flat, predictions_flat), 'metrics/per_class_accuracy': tf.metrics.mean_per_class_accuracy(labels_flat, predictions_flat, num_classes), 'metrics/event_accuracy': tf.metrics.recall(event_positions, correct_predictions), 'metrics/no_event_accuracy': tf.metrics.recall(no_event_positions, correct_predictions), 'metrics/loss_per_step': tf.metrics.mean( tf.reduce_sum(softmax_cross_entropy) / num_steps, weights=num_steps), }) for updates_op in update_ops.values(): tf.add_to_collection('eval_ops', updates_op) # Perplexity is just exp(loss) and doesn't need its own update op. vars_to_summarize['metrics/perplexity'] = tf.exp( vars_to_summarize['loss']) vars_to_summarize['metrics/perplexity_per_step'] = tf.exp( vars_to_summarize['metrics/loss_per_step']) for var_name, var_value in six.iteritems(vars_to_summarize): tf.summary.scalar(var_name, var_value) tf.add_to_collection(var_name, var_value) elif mode == 'generate': temperature = tf.placeholder(tf.float32, []) if isinstance(num_classes, numbers.Number): softmax_flat = tf.nn.softmax( tf.div(logits_flat, tf.fill([num_classes], temperature))) softmax = tf.reshape( softmax_flat, [hparams.batch_size, -1, num_classes]) else: logits_offsets = np.cumsum([0] + num_classes) softmax = [] for i in range(len(num_classes)): sm = tf.nn.softmax( tf.div( logits_flat[:, logits_offsets[i]:logits_offsets[i + 1]], tf.fill([num_classes[i]], temperature))) sm = tf.reshape(sm, [hparams.batch_size, -1, num_classes[i]]) softmax.append(sm) tf.add_to_collection('inputs', inputs) tf.add_to_collection('temperature', temperature) tf.add_to_collection('softmax', softmax) # Flatten state tuples for metagraph compatibility. for state in tf_nest.flatten(initial_state): tf.add_to_collection('initial_state', state) for state in tf_nest.flatten(final_state): tf.add_to_collection('final_state', state)
def load_noteseqs(fp, batch_size, seq_len, max_discrete_times=None, max_discrete_velocities=None, augment_stretch_bounds=None, augment_transpose_bounds=None, randomize_chord_order=False, repeat=False, buffer_size=512): """Loads random subsequences from NoteSequences in TFRecords. Args: fp: List of shard fps. batch_size: Number of sequences in batch. seq_len: Length of subsequences. max_discrete_times: Maximum number of time buckets at 31.25Hz. max_discrete_velocities: Maximum number of velocity buckets. augment_stretch_bounds: Tuple containing speed ratio range. augment_transpose_bounds: Tuple containing semitone augmentation range. randomize_chord_order: If True, list notes of chord in random order. repeat: If True, continuously loop through records. buffer_size: Size of random queue. Returns: A dict containing the loaded tensor subsequences. Raises: ValueError: Invalid file format for shard filepaths. """ # Deserializes NoteSequences and extracts numeric tensors def _str_to_tensor(note_sequence_str, augment_stretch_bounds=None, augment_transpose_bounds=None): """Converts a NoteSequence serialized proto to arrays.""" note_sequence = music_pb2.NoteSequence.FromString(note_sequence_str) note_sequence_ordered = list(note_sequence.notes) if randomize_chord_order: random.shuffle(note_sequence_ordered) note_sequence_ordered = sorted(note_sequence_ordered, key=lambda n: n.start_time) else: note_sequence_ordered = sorted(note_sequence_ordered, key=lambda n: (n.start_time, n.pitch)) # Transposition data augmentation if augment_transpose_bounds is not None: transpose_factor = np.random.randint(*augment_transpose_bounds) for note in note_sequence_ordered: note.pitch += transpose_factor note_sequence_ordered = [ n for n in note_sequence_ordered if (n.pitch >= 21) and (n.pitch <= 108) ] pitches = np.array([note.pitch for note in note_sequence_ordered]) velocities = np.array( [note.velocity for note in note_sequence_ordered]) start_times = np.array( [note.start_time for note in note_sequence_ordered]) end_times = np.array([note.end_time for note in note_sequence_ordered]) # Tempo data augmentation if augment_stretch_bounds is not None: stretch_factor = np.random.uniform(*augment_stretch_bounds) start_times *= stretch_factor end_times *= stretch_factor if note_sequence_ordered: # Delta time start high to indicate free decision delta_times = np.concatenate([[100000.], start_times[1:] - start_times[:-1]]) else: delta_times = np.zeros_like(start_times) return note_sequence_str, np.stack( [pitches, velocities, delta_times, start_times, end_times], axis=1).astype(np.float32) # Filter out excessively short examples def _filter_short(note_sequence_tensor, seq_len): note_sequence_len = tf.shape(note_sequence_tensor)[0] return tf.greater_equal(note_sequence_len, seq_len) # Take a random crop of a note sequence def _random_crop(note_sequence_tensor, seq_len): note_sequence_len = tf.shape(note_sequence_tensor)[0] start_max = note_sequence_len - seq_len start_max = tf.maximum(start_max, 0) start = tf.random_uniform([], maxval=start_max + 1, dtype=tf.int32) seq = note_sequence_tensor[start:start + seq_len] return seq # Find sharded filenames filenames = tf.gfile.Glob(fp) # Create dataset dataset = tf.data.TFRecordDataset(filenames) # Deserialize protos # pylint: disable=g-long-lambda dataset = dataset.map(lambda data: tf.py_func(lambda x: _str_to_tensor( x, augment_stretch_bounds, augment_transpose_bounds), [data], (tf.string, tf.float32), stateful=False)) # pylint: enable=g-long-lambda # Filter sequences that are too short dataset = dataset.filter(lambda s, n: _filter_short(n, seq_len)) # Get random crops dataset = dataset.map(lambda s, n: (s, _random_crop(n, seq_len))) # Shuffle if repeat: dataset = dataset.shuffle(buffer_size=buffer_size) # Make batches dataset = dataset.batch(batch_size, drop_remainder=True) # Repeat if repeat: dataset = dataset.repeat() # Get tensors iterator = dataset.make_one_shot_iterator() note_sequence_strs, note_sequence_tensors = iterator.get_next() # Set shapes note_sequence_strs.set_shape([batch_size]) note_sequence_tensors.set_shape([batch_size, seq_len, 5]) # Retrieve tensors note_pitches = tf.cast(note_sequence_tensors[:, :, 0] + 1e-4, tf.int32) note_velocities = tf.cast(note_sequence_tensors[:, :, 1] + 1e-4, tf.int32) note_delta_times = note_sequence_tensors[:, :, 2] note_start_times = note_sequence_tensors[:, :, 3] note_end_times = note_sequence_tensors[:, :, 4] # Onsets and frames model samples at 31.25Hz note_delta_times_int = tf.cast( tf.round(note_delta_times * 31.25) + 1e-4, tf.int32) # Reduce time discretizations to a fixed number of buckets if max_discrete_times is not None: note_delta_times_int = tf.minimum(note_delta_times_int, max_discrete_times) # Quantize velocities if max_discrete_velocities is not None: note_velocities = tf.minimum( note_velocities / (128 // max_discrete_velocities), max_discrete_velocities) # Build return dict note_tensors = { "pb_strs": note_sequence_strs, "midi_pitches": note_pitches, "velocities": note_velocities, "delta_times": note_delta_times, "delta_times_int": note_delta_times_int, "start_times": note_start_times, "end_times": note_end_times } return note_tensors
def simulate(self, action): with tf.name_scope("environment/simulate"): actions = tf.concat([tf.expand_dims(action, axis=1)] * self._num_frames, axis=1) history = self.history_buffer.get_all_elements() with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): # We only need 1 target frame here, set it. hparams_target_frames = self._model.hparams.video_num_target_frames self._model.hparams.video_num_target_frames = 1 model_output = self._model.infer({ "inputs": history, "input_action": actions, "reset_internal_states": self._reset_model.read_value() }) self._model.hparams.video_num_target_frames = hparams_target_frames observ = tf.cast(tf.squeeze(model_output["targets"], axis=1), self.observ_dtype) reward = tf.to_float(model_output["target_reward"]) reward = tf.reshape(reward, shape=(self.batch_size, )) + self._min_reward if self._intrinsic_reward_scale: # Use the model's uncertainty about its prediction as an intrinsic # reward. The uncertainty is measured by the log probability of the # predicted pixel value. if "targets_logits" not in model_output: raise ValueError( "The use of intrinsic rewards requires access to " "the logits. Ensure that model.infer returns " "'targets_logits'") uncertainty_reward = compute_uncertainty_reward( model_output["targets_logits"], model_output["targets"]) uncertainty_reward = tf.minimum( 1., self._intrinsic_reward_scale * uncertainty_reward) uncertainty_reward = tf.Print(uncertainty_reward, [uncertainty_reward], message="uncertainty_reward", first_n=1, summarize=8) reward += uncertainty_reward done = tf.constant(False, tf.bool, shape=(self.batch_size, )) with tf.control_dependencies([observ]): dump_frame_op = tf.cond( self._video_condition, lambda: tf.py_func( self._video_dump_frame, # pylint: disable=g-long-lambda [observ, reward], []), tf.no_op) with tf.control_dependencies([ self._observ.assign(observ), self.history_buffer.move_by_one_element(observ), dump_frame_op ]): clear_reset_model_op = tf.assign(self._reset_model, tf.constant(0.0)) with tf.control_dependencies([clear_reset_model_op]): return tf.identity(reward), tf.identity(done)
def calculate_metrics(frame_probs, onset_probs, frame_predictions, onset_predictions, offset_predictions, velocity_values, sequence_label, frame_labels, sequence_id, hparams, min_pitch, max_pitch, onsets_only=False, pitch_map=None): """Calculate metrics for a single example.""" def add_metrics(precision, recall, f1, prefix): """Create and return a dict of metrics.""" metrics = { prefix + '_precision': precision, prefix + '_recall': recall, prefix + '_f1_score': f1, } return metrics def make_metrics(note_precision, note_recall, note_f1, note_with_velocity_precision, note_with_velocity_recall, note_with_velocity_f1, note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1, note_with_offsets_velocity_precision, note_with_offsets_velocity_recall, note_with_offsets_velocity_f1, processed_frame_predictions, frame_labels, onsets_only=False, prefix=''): """Create a dict of onset, offset, frame and velocity metrics.""" metrics = add_metrics(note_precision, note_recall, note_f1, '_'.join(x for x in [prefix, 'note'] if x)) metrics.update( add_metrics(note_with_velocity_precision, note_with_velocity_recall, note_with_velocity_f1, '_'.join(x for x in [prefix, 'note_with_velocity'] if x))) if not onsets_only: metrics.update( add_metrics(note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1, '_'.join(x for x in [prefix, 'note_with_offsets'] if x))) metrics.update( add_metrics( note_with_offsets_velocity_precision, note_with_offsets_velocity_recall, note_with_offsets_velocity_f1, '_'.join(x for x in [prefix, 'note_with_offsets_velocity'] if x))) frame_metrics = calculate_frame_metrics( frame_labels=frame_labels, frame_predictions=processed_frame_predictions) metrics.update( add_metrics(frame_metrics['precision'], frame_metrics['recall'], frame_metrics['f1_score'], '_'.join(x for x in [prefix, 'frame'] if x))) metrics['frame_accuracy'] = frame_metrics['accuracy'] metrics['frame_accuracy_without_true_negatives'] = frame_metrics[ 'accuracy_without_true_negatives'] return metrics (note_precision, note_recall, note_f1, note_with_velocity_precision, note_with_velocity_recall, note_with_velocity_f1, note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1, note_with_offsets_velocity_precision, note_with_offsets_velocity_recall, note_with_offsets_velocity_f1, processed_frame_predictions) = tf.py_func( functools.partial( _calculate_metrics_py, hparams=hparams, min_pitch=min_pitch, max_pitch=max_pitch, onsets_only=onsets_only), inp=[ frame_probs, onset_probs, frame_predictions, onset_predictions, offset_predictions, velocity_values, sequence_label, frame_labels, sequence_id ], Tout=([tf.float64] * 12) + [tf.float32], stateful=False) metrics = make_metrics( note_precision, note_recall, note_f1, note_with_velocity_precision, note_with_velocity_recall, note_with_velocity_f1, note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1, note_with_offsets_velocity_precision, note_with_offsets_velocity_recall, note_with_offsets_velocity_f1, processed_frame_predictions, frame_labels, onsets_only=onsets_only) if pitch_map: for pitch, name in pitch_map.items(): (note_precision, note_recall, note_f1, note_with_velocity_precision, note_with_velocity_recall, note_with_velocity_f1, note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1, note_with_offsets_velocity_precision, note_with_offsets_velocity_recall, note_with_offsets_velocity_f1, processed_frame_predictions) = tf.py_func( functools.partial( _calculate_metrics_py, hparams=hparams, min_pitch=min_pitch, max_pitch=max_pitch, onsets_only=onsets_only, restrict_to_pitch=pitch), inp=[ frame_probs, onset_probs, frame_predictions, onset_predictions, offset_predictions, velocity_values, sequence_label, frame_labels, sequence_id + name ], Tout=([tf.float64] * 12) + [tf.float32], stateful=False) metrics.update( make_metrics( note_precision, note_recall, note_f1, note_with_velocity_precision, note_with_velocity_recall, note_with_velocity_f1, note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1, note_with_offsets_velocity_precision, note_with_offsets_velocity_recall, note_with_offsets_velocity_f1, processed_frame_predictions, frame_labels, onsets_only=onsets_only, prefix='pitch/' + name)) return metrics