def tf_data_to_COCO(ds: tf.data.Dataset, class2idx: Mapping[str, int]) -> COCO: gt_coco: dict = dict(images=[], annotations=[]) image_id = 1 annot_id = 1 # Create COCO categories categories = [ dict(supercategory='instance', id=i, name=n) for n, i in class2idx.items() ] gt_coco['categories'] = categories for image, (labels, bbs) in ds.unbatch(): h, w = image.shape[0:2] im_annot, annots = _COCO_gt_annot(image_id, annot_id, (h, w), labels, bbs) gt_coco['annotations'].extend(annots) gt_coco['images'].append(im_annot) annot_id += len(annots) image_id += 1 gtCOCO = COCO() gtCOCO.dataset = gt_coco gtCOCO.createIndex() return gtCOCO
def call(self, dataset: tf.data.Dataset): r""" Perform the adversarial training. Args: dataset (:py:class:`tf.data.Dataset`): The adversarial training dataset. """ current_epoch = self._current_epoch() self._update_global_batch_size( dataset, [self._d_loss, self._g_loss, self._e_loss] ) dataset = wrap( dataset.unbatch().batch(self._global_batch_size, drop_remainder=True) ) samples = next(iter(dataset.take(1))) gen_inputs = samples[1] with self._train_summary_writer.as_default(): self._log("real_x", samples[0][0]) self._log("real_y", samples[0][1]) for epoch in tf.range(current_epoch, self._epochs): distribute_dataset = self._distribute_strategy.experimental_distribute_dataset( dataset ) for example in distribute_dataset: d_loss, g_loss, e_loss, fake, generator_of_encoder = self._train_step( example ) self._global_step.assign_add(1) if tf.equal(tf.math.mod(self._global_step, 10), 0): tf.print( f"[{self._global_step.numpy()}] g_loss: {g_loss} - " f"d_loss: {d_loss} - e_loss: {e_loss}" ) self._measure_performance( tf.data.Dataset.from_tensor_slices(example).batch( self._global_batch_size ) ) self._epoch_completed(epoch + 1) if self._log_eval_mode == LogEvalMode.TEST: self._log("generator", self._generator(gen_inputs, training=False)) self._log( "generator_of_encoder", self._generator( self._encoder(samples[0][0], training=False), training=False ), ) elif self._log_eval_mode == LogEvalMode.TRAIN: self._log("generator", fake) self._log("generator_of_encoder", generator_of_encoder)
def processing(dataset: tf.data.Dataset, window_size, batch_size): dataset = dataset.map(lambda x: table.lookup(x)) dataset = dataset.unbatch() dataset = dataset.window(window_size+1, shift = 1, drop_remainder=True) dataset = dataset.flat_map(lambda ds: ds.batch(window_size+1)) dataset = dataset.map(lambda x: (x[:-1], x[-1]-1)) dataset = dataset.shuffle(10000) dataset = dataset.batch(batch_size).prefetch(1) return dataset
def augment_random_warp(ds: tf.data.Dataset, process_in_batch=10, **options) -> tf.data.Dataset: '''apply augmentation based on image warping Args: process_in_batch: the number of images to apply warping in a batch None to disable this feature options: options to be passed to random_warp function ''' if process_in_batch is not None: ds = ds.batch(process_in_batch) ds = ds.map( lambda image: random_warp(image, process_in_batch=process_in_batch, **options), num_parallel_calls=tf.data.experimental.AUTOTUNE, ) if process_in_batch is not None: ds = ds.unbatch() return ds
def size_of_dataset(dataset: tf.data.Dataset) -> int: count = 0 for element in dataset.unbatch().batch(1): count += 1 return count
def _pack_with_tf_ops(dataset: tf.data.Dataset, keys: List[str], key2length: Dict[str, int]) -> tf.data.Dataset: """Helper-function for packing a dataset which has already been batched. Helper for pack_dataset() Uses tf.while_loop. Args: dataset: a dataset containing padded batches of examples. keys: a list of strings key2length: an dict from feature-key to integer Returns: a dataset. """ empty_example = {} for k in keys: empty_example[k] = tf.zeros([0], dtype=tf.int32) empty_example[k + '_position'] = tf.zeros([0], dtype=tf.int32) keys_etc = empty_example.keys() def write_packed_example(partial, outputs): new_partial = empty_example.copy() new_outputs = {} for k in keys_etc: new_outputs[k] = outputs[k].write( outputs[k].size(), tf.pad(partial[k], [[0, key2length[k] - tf.size(partial[k])]])) return new_partial, new_outputs def map_fn(x): """Internal function to flat_map over. Consumes a batch of input examples and produces a variable number of output examples. Args: x: a single example Returns: a tf.data.Dataset """ partial = empty_example.copy() i = tf.zeros([], dtype=tf.int32) dynamic_batch_size = tf.shape(x[keys[0]])[0] outputs = {} for k in keys: outputs[k] = tf.TensorArray(tf.int32, size=0, dynamic_size=True, element_shape=[key2length[k]]) outputs[k + '_position'] = tf.TensorArray( tf.int32, size=0, dynamic_size=True, element_shape=[key2length[k]]) def body_fn(i, partial, outputs): """Body function for while_loop. Args: i: integer scalar partial: dictionary of Tensor (partially-constructed example) outputs: dictionary of TensorArray Returns: A triple containing the new values of the inputs. """ can_append = True one_example = {} for k in keys: val = tf.cast(x[k][i], tf.int32) val = val[:tf. reduce_sum(tf.cast(tf.not_equal(val, 0), tf.int32))] one_example[k] = val for k in keys: can_append = tf.logical_and( can_append, tf.less_equal( tf.size(partial[k]) + tf.size(one_example[k]), key2length[k])) def false_fn(): return write_packed_example(partial, outputs) def true_fn(): return partial, outputs partial, outputs = tf.cond(can_append, true_fn, false_fn) new_partial = {} for k in keys: new_seq = one_example[k][:key2length[k]] new_seq_len = tf.size(new_seq) new_partial[k] = tf.concat([partial[k], new_seq], 0) new_partial[k + '_position'] = tf.concat( [partial[k + '_position'], tf.range(new_seq_len)], 0) partial = new_partial return i + 1, partial, outputs # For loop over all examples in the batch. i, partial, outputs = tf.while_loop( cond=lambda *_: True, body=body_fn, loop_vars=(i, partial, outputs), shape_invariants=( tf.TensorShape([]), {k: tf.TensorShape([None]) for k in keys_etc}, {k: tf.TensorShape(None) for k in keys_etc}, ), maximum_iterations=dynamic_batch_size) _, outputs = write_packed_example(partial, outputs) packed = {k: outputs[k].stack() for k in keys_etc} for k in keys: packed[k + '_segmentation'] = (tf.cumsum( tf.cast(tf.equal(packed[k + '_position'], 0), tf.int32), axis=1) * tf.cast(tf.not_equal(packed[k], 0), tf.int32)) return packed dataset = dataset.map(map_fn, num_parallel_calls=AUTOTUNE) return dataset.unbatch()
def call( self, training_set: tf.data.Dataset, validation_set: tf.data.Dataset, log_freq: int = 10, measure_performance_freq: int = 10, ): """ Start the training. Args: training_set (:py:obj:`tf.data.Dataset`): Training dataset. validation_set (:py:obj:`tf.data.Dataset`): Validation dataset. log_freq (int): Specifies how many steps to run before logging the losses, e.g. `log_frequency=10` logs every 10 steps of training. Pass `log_frequency<=0` in case you don't want to log. measure_performance_freq (int): Specifies how many steps to run before measuring the performance, e.g. `measure_performance_freq=10` measures performance every 10 steps of training. Pass `measure_performance_freq<=0` in case you don't want to measure performance. """ # set the context properties self._context.training_set = training_set self._context.validation_set = validation_set current_epoch = self._current_epoch() self._update_global_batch_size(training_set, self._loss) # measure performance on the validation set with self._eval_summary_writer.as_default(): self._context.dataset = validation_set self._measure_performance() # need to use the global batch size in the training set training_set = wrap(training_set.unbatch().batch( self._global_batch_size, drop_remainder=tf.distribute.has_strategy())) with self._train_summary_writer.as_default(): # notify on train start self._on_train_start() for _ in tf.range(current_epoch, self._epochs): distribute_dataset = self._distribute_strategy.experimental_distribute_dataset( training_set) # notify on epoch start self._on_epoch_start() for example in distribute_dataset: self._context.current_batch = self.local_example( example, (1, 1)) # notify on batch start self._on_batch_start() # perform training step loss = self._train_step(example) # increase global step self._global_step.assign_add(1) # log loss if needed if log_freq > 0 and tf.equal( tf.math.mod(self._global_step, log_freq), 0): tf.print(f"[{self._global_step.numpy()}] loss: {loss}") # measure performance # this can also be moved to on_batch_end self._measure_performance_if_needed( example, measure_performance_freq) # notify on batch end self._on_batch_end() # notify on epoch end self._on_epoch_end() with self._eval_summary_writer.as_default(): self._context.dataset = validation_set self._measure_performance() # final callback self._on_train_end()
def __init__(self, log_dir: str, predict_images: tf.data.Dataset, num_images: int = 5): super().__init__() self.num_images = num_images self.predict_images = predict_images.unbatch().shuffle(5000).take(num_images) self.writer = tf.summary.create_file_writer(os.path.join(log_dir, "images"))
def size_of_dataset(dataset: tf.data.Dataset) -> int: count = len(list(dataset.unbatch().as_numpy_iterator())) return count
def dataset_extract_images(D: tf.data.Dataset) -> tf.data.Dataset: D = D.map(extract_image_and_label, num_parallel_calls=4) D = D.batch(1).map(transform_image, num_parallel_calls=4) D = D.unbatch() return D
def call( self, dataset: tf.data.Dataset, log_freq: int = 10, measure_performance_freq: int = 10, ): r""" Perform the adversarial training. Args: dataset (:py:class:`tf.data.Dataset`): The adversarial training dataset. log_freq (int): Specifies how many steps to run before logging the losses, e.g. `log_frequency=10` logs every 10 steps of training. Pass `log_frequency<=0` in case you don't want to log. measure_performance_freq (int): Specifies how many steps to run before measuring the performance, e.g. `measure_performance_freq=10` measures performance every 10 steps of training. Pass `measure_performance_freq<=0` in case you don't want to measure performance. """ current_epoch = self._current_epoch() self._update_global_batch_size( dataset, [ self._discriminator_loss, self._generator_loss, self._encoder_loss ], ) dataset = wrap(dataset.unbatch().batch(self._global_batch_size, drop_remainder=True)) samples = next(iter(dataset.take(1))) self._context.generator_inputs = samples[1] self._context.encoder_inputs = samples[0][0] with self._train_summary_writer.as_default(): # notify on train start event self._on_train_start() for _ in tf.range(current_epoch, self._epochs): distribute_dataset = self._distribute_strategy.experimental_distribute_dataset( dataset) # notify on epoch start event self._on_epoch_start() for example in distribute_dataset: # perform training step d_loss, g_loss, e_loss, fake, generator_of_encoder = self._train_step( example) # increase global step self._global_step.assign_add(1) # setup fake_samples self._context.fake_samples = fake self._context.generator_of_encoder = generator_of_encoder # Log losses if log_freq > 0 and tf.equal( tf.math.mod(self._global_step, log_freq), 0): tf.print( f"[{self._global_step.numpy()}] g_loss: {g_loss} - " f"d_loss: {d_loss} - e_loss: {e_loss}") # measure performance if needed self._measure_performance_if_needed( example, measure_performance_freq) # notify on batch end event self._on_batch_end() # notify on epoch end event self._on_epoch_end() # notify on training end event self._on_train_end()
def transform_dataset(self, ds_input: tf.data.Dataset) -> tf.data.Dataset: """Create a dataset with unbatched elements.""" return ds_input.unbatch()
def test_model(model: tf.keras.Model, test_ds: tf.data.Dataset, select_im=1): """ Tests model with dataset pipeline, shows bbox ellipse and GT-Predictions :param model: Model object :param test_ds: Evaluation dataset object :param select_im: Selected image number """ for idx, (x, y) in enumerate( test_ds.unbatch().take(select_im).as_numpy_iterator()): # Shows only selected image if idx == select_im - 1: # Unpack GT values img = x class_type = y[3] is_defect = y[0] bbox_param = y[1] bbox_center = y[2] # Create empty axis for inference # Model expects (batch_size, 224, 224, 3) shaped input img_arr = np.array(img)[np.newaxis, :, :, :] # Inference y_pred = model.predict(img_arr) # Convert unsigned inter to show img_arr = (img_arr * 255).astype(np.uint8) # Unpack prediction results class_type_pred = y_pred[3] is_defect_pred = y_pred[0] bbox_param_pred = np.squeeze(y_pred[1]) bbox_center_pred = np.squeeze(y_pred[2]) # Select batch and resize to 512-512 img_arr = Image.fromarray(img_arr[0]) img_arr = img_arr.resize((512, 512)) # Print logs print( f'Is defected: \tGT vs Prediction | {is_defect} - {is_defect_pred.squeeze()}' ) print( f'Class type: \tGT vs Prediction | {np.argmax(class_type)} - {np.argmax(class_type_pred.squeeze())}' ) print( f'Bbox center: \tGT vs Prediction | {bbox_center} - {bbox_center_pred}' ) print( f'Bbox params: \tGT vs Prediction | {bbox_param} - {bbox_param_pred}' ) print( f'Bbox center \tL2 Distance | {np.mean((bbox_center - bbox_center_pred) ** 2)}' ) # Plot configuration fig, ax = plt.subplots(figsize=(12, 12)) plt.imshow(img_arr, cmap='gray') # Creating un-filled ellipse on image if is_defect > 0.5: e = Ellipse(xy=(bbox_center_pred * 512), width=bbox_param_pred[0] * 256, height=bbox_param_pred[1] * 256, angle=((bbox_param_pred[2] * 2 * np.pi - np.pi) * 180 / np.pi), edgecolor='b', lw=2, facecolor='none') e.set_alpha(0.8) ax.add_artist(e) e_org = Ellipse(xy=(bbox_center * 512), width=bbox_param[0] * 256, height=bbox_param[1] * 256, angle=((bbox_param[2] * 2 * np.pi - np.pi) * 180 / np.pi), edgecolor='r', lw=2, facecolor='none') e_org.set_alpha(0.8) ax.add_artist(e_org) plt.show()