def init_object_detector_graph(self, input_h, input_w, init_weights): self.is_train = _tf.placeholder( _tf.bool) # Set flag for training or val # Create placeholders for image and labels self.images = _tf.placeholder(_tf.float32, [self.batch_size, input_h, input_w, 3], name="images") self.labels = _tf.placeholder( _tf.float32, [ self.batch_size, self.grid_shape[0], self.grid_shape[1], self.num_anchors, self.num_classes + 5, ], name="labels", ) self.tf_model = self.tiny_yolo(inputs=self.images, output_size=self.output_size) self.global_step = _tf.Variable(0, trainable=False, name="global_step") self.loss = self.loss_layer(self.tf_model, self.labels) self.base_lr = _utils.convert_shared_float_array_to_numpy( self.config["learning_rate"]) self.num_iterations = int( _utils.convert_shared_float_array_to_numpy( self.config["num_iterations"])) self.init_steps = [ self.num_iterations // 2, 3 * self.num_iterations // 4, self.num_iterations, ] self.lrs = [ _np.float32(self.base_lr * 10**(-i)) for i, step in enumerate(self.init_steps) ] self.steps_tf = self.init_steps[:-1] self.lr = _tf.train.piecewise_constant(self.global_step, self.steps_tf, self.lrs) # TODO: Evaluate method to update lr in set_learning_rate() self.opt = _tf.train.MomentumOptimizer(self.lr, momentum=0.9) self.clip_value = _utils.convert_shared_float_array_to_numpy( self.config.get("gradient_clipping")) grads_and_vars = self.opt.compute_gradients(self.loss) clipped_gradients = [(self.ClipIfNotNone(g, self.clip_value), v) for g, v in grads_and_vars] self.train_op = self.opt.apply_gradients(clipped_gradients, global_step=self.global_step) self.sess.run(_tf.global_variables_initializer()) self.sess.run(_tf.local_variables_initializer()) self.load_weights(init_weights)
def __init__(self, input_h, input_w, batch_size, output_size, out_h, out_w, init_weights, config): self.gpu_policy = _utils.TensorFlowGPUPolicy() self.gpu_policy.start() # Converting incoming weights from shared_float_array to numpy for key in init_weights.keys(): init_weights[key] = _utils.convert_shared_float_array_to_numpy(init_weights[key]) self.od_graph = _tf.Graph() self.config = config self.batch_size = batch_size self.grid_shape = [out_h, out_w] self.num_classes = int(_utils.convert_shared_float_array_to_numpy(config['num_classes'])) self.anchors = [ (1.0, 2.0), (1.0, 1.0), (2.0, 1.0), (2.0, 4.0), (2.0, 2.0), (4.0, 2.0), (4.0, 8.0), (4.0, 4.0), (8.0, 4.0), (8.0, 16.0), (8.0, 8.0), (16.0, 8.0), (16.0, 32.0), (16.0, 16.0), (32.0, 16.0), ] self.num_anchors = len(self.anchors) self.output_size = output_size self.sess = _tf.Session(graph=self.od_graph) with self.od_graph.as_default(): self.init_object_detector_graph(input_h, input_w, init_weights)
def __init__(self, config, net_params): self.gpu_policy = _utils.TensorFlowGPUPolicy() self.gpu_policy.start() _tf.reset_default_graph() for key in net_params.keys(): net_params[key] = _utils.convert_shared_float_array_to_numpy(net_params[key]) for key in config.keys(): config[key] = _utils.convert_shared_float_array_to_numpy(config[key]) self._batch_size = 1 self._finetune_all_params = True self._define_training_graph = bool(config['st_training']) self._tf_variables = define_tensorflow_variables(net_params) # TODO: take care of batch size self.tf_input = _tf.placeholder(dtype = _tf.float32, shape = [None, 256, 256, 3]) self.tf_style = _tf.placeholder(dtype = _tf.float32, shape = [None, 256, 256, 3]) self.tf_index = _tf.placeholder(dtype = _tf.int64, shape = [self.batch_size]) self.__define_graph(); self.sess = _tf.Session() init = _tf.global_variables_initializer() self.sess.run(init)
def get_augmented_data(self, images, annotations): with tf.Session(graph=self.graph) as session: feed_dict = dict() graph_op = self.resize_op_batch[0 : len(images)] for i in range(0, len(images)): feed_dict[self.img_tf[i]] = _utils.convert_shared_float_array_to_numpy( images[i] ) if self.resize_only: feed_dict[self.ann_tf[i]] = self.batch_size * [np.zeros(6)] else: feed_dict[ self.ann_tf[i] ] = _utils.convert_shared_float_array_to_numpy(annotations[i]) aug_output = session.run(graph_op, feed_dict=feed_dict) processed_images = [] processed_annotations = [] for o in aug_output: processed_images.append(o[0]) processed_annotations.append( np.ascontiguousarray(o[1], dtype=np.float32) ) processed_images = np.array(processed_images, dtype=np.float32) processed_images = np.ascontiguousarray(processed_images, dtype=np.float32) return (processed_images, processed_annotations)
def get_augmented_data(self, images, annotations, random_seed): tf = _lazy_import_tensorflow() with tf.Session(graph=self.graph) as session: feed_dict = dict() # Populate feed_dict with images and annotations graph_op = self.resize_op_batch[0:len(images)] for i in range(len(images)): feed_dict[self.img_tf[ i]] = _utils.convert_shared_float_array_to_numpy(images[i]) feed_dict[self.ann_tf[ i]] = _utils.convert_shared_float_array_to_numpy( annotations[i]) # Populate feed_dict with random seed and random alpha values, used # to sample image perturbations. We don't use TensorFlow's built-in # support for random number generation, since we want to effectively # reset the seed for each session (batch). random = np.random.RandomState(seed=random_seed) feed_dict[self.alpha_tf] = random.rand(*self.alpha_tf.shape) feed_dict[self.random_seed_tf] = random.randint( 0, 2**32, size=self.batch_size) aug_output = session.run(graph_op, feed_dict=feed_dict) processed_images = [] processed_annotations = [] for o in aug_output: processed_images.append(o[0]) processed_annotations.append( np.ascontiguousarray(o[1], dtype=np.float32)) processed_images = np.array(processed_images, dtype=np.float32) processed_images = np.ascontiguousarray(processed_images, dtype=np.float32) return (processed_images, processed_annotations)
def __init__(self, input_h, input_w, batch_size, output_size, init_weights, config, is_train=True): #reset tensorflow graph when a new model is created _tf.reset_default_graph() # Converting incoming weights from shared_float_array to numpy for key in init_weights.keys(): init_weights[key] = _utils.convert_shared_float_array_to_numpy(init_weights[key]) self.config = config self.batch_size = batch_size self.grid_shape = [13,13] self.num_classes = int(_utils.convert_shared_float_array_to_numpy(config['num_classes'])) self.anchors = [ (1.0, 2.0), (1.0, 1.0), (2.0, 1.0), (2.0, 4.0), (2.0, 2.0), (4.0, 2.0), (4.0, 8.0), (4.0, 4.0), (8.0, 4.0), (8.0, 16.0), (8.0, 8.0), (16.0, 8.0), (16.0, 32.0), (16.0, 16.0), (32.0, 16.0), ] self.num_anchors = len(self.anchors) self.output_size = output_size self.is_train = is_train # Set flag for training or val # Create placeholders for image and labels self.images = _tf.placeholder(_tf.float32, [self.batch_size, input_h, input_w, 3], name='images') self.labels = _tf.placeholder(_tf.float32, [self.batch_size, self.grid_shape[0], self.grid_shape[1], self.num_anchors, self.num_classes + 5], name='labels') self.init_weights = init_weights self.tf_model = self.tiny_yolo(inputs=self.images, output_size=self.output_size) self.global_step = _tf.Variable(0, trainable=False, name="global_step") self.loss = self.loss_layer(self.tf_model, self.labels) self.base_lr = _utils.convert_shared_float_array_to_numpy(config['learning_rate']) self.num_iterations = int(_utils.convert_shared_float_array_to_numpy(config['num_iterations'])) self.init_steps = [self.num_iterations // 2, 3 * self.num_iterations // 4, self.num_iterations] self.lrs = [_np.float32(self.base_lr * 10 ** (-i)) for i, step in enumerate(self.init_steps)] self.steps_tf = self.init_steps[:-1] self.lr = _tf.train.piecewise_constant(self.global_step, self.steps_tf, self.lrs) # TODO: Evaluate method to update lr in set_learning_rate() self.opt = _tf.train.MomentumOptimizer(self.lr, momentum=0.9) self.clip_value = _utils.convert_shared_float_array_to_numpy(self.config.get('gradient_clipping')) grads_and_vars = self.opt.compute_gradients(self.loss) clipped_gradients = [(self.ClipIfNotNone(g, self.clip_value), v) for g, v in grads_and_vars] self.train_op = self.opt.apply_gradients(clipped_gradients, global_step=self.global_step) self.sess = _tf.Session() self.sess.run(_tf.global_variables_initializer()) self.sess.run(_tf.local_variables_initializer()) self.load_weights(self.init_weights)
def apply_bounding_box_transformation(images, annotations, transformations, clip_to_shape=None): aug_anns = [] for i in range(len(annotations)): image = _utils.convert_shared_float_array_to_numpy(images[i]) height = image.shape[0] width = image.shape[0] ann = annotations[i] annotation = _utils.convert_shared_float_array_to_numpy(ann) identifier = np.expand_dims(annotation[:, 0], axis=1) box = np.zeros(annotation[:, 1:5].shape) for j in range(len(annotation)): box[j][0] = annotation[j][2]*float(height) box[j][1] = annotation[j][1]*float(width) box[j][2] = (annotation[j][4]+annotation[j][2])*float(height) box[j][3] = (annotation[j][3]+annotation[j][1])*float(width) confidence = np.expand_dims(annotation[:, 5], axis=1) # The bounding box is [n, 4] reshaped and ones added to multiply to tranformation matrix v = np.concatenate([box.reshape(-1, 2), np.ones((box.shape[0]*2, 1), dtype=np.float32)], axis=1) # Transform v = np.dot(v, np.transpose(transformations[i])) # Reverse shape bbox_out = v[:, :2].reshape(-1, 4) # Make points correctly ordered (lower < upper) # Can probably be made much nicer (numpy-ified?) for i in range(len(bbox_out)): if bbox_out[i][0] > bbox_out[i][2]: bbox_out[i][0], bbox_out[i][2] = bbox_out[i][2], bbox_out[i][0] if bbox_out[i][1] > bbox_out[i][3]: bbox_out[i][1], bbox_out[i][3] = bbox_out[i][3], bbox_out[i][1] if clip_to_shape is not None: bbox_out[:, 0::2] = np.clip(bbox_out[:, 0::2], 0, clip_to_shape[0]) bbox_out[:, 1::2] = np.clip(bbox_out[:, 1::2], 0, clip_to_shape[1]) bbox = np.zeros(bbox_out.shape) for k in range(len(bbox_out)): bbox[k][0] = bbox_out[k][1]/float(clip_to_shape[0]) bbox[k][1] = bbox_out[k][0]/float(clip_to_shape[1]) bbox[k][2] = (bbox_out[k][3] - bbox_out[k][1])/float(clip_to_shape[0]) bbox[k][3] = (bbox_out[k][2] - bbox_out[k][0])/float(clip_to_shape[1]) an = np.hstack((np.hstack((identifier, bbox)), confidence)) an = np.ascontiguousarray(an, dtype=np.float32) aug_anns.append(an) return aug_anns
def get_resized_images(images, output_shape): resized_images = [] for i in range(len(images)): image = images[i] image = _utils.convert_shared_float_array_to_numpy(image) height, width, _ = tf.unstack(tf.shape(image)) orig_shape = (height, width) scale_h = tf.constant(output_shape[0], dtype=tf.float32) / tf.to_float(height) scale_w = tf.constant(output_shape[1], dtype=tf.float32) / tf.to_float(width) new_height = tf.to_int32(tf.to_float(height) * scale_h) new_width = tf.to_int32(tf.to_float(width) * scale_w) image_scaled = tf.squeeze(tf.image.resize_bilinear(tf.expand_dims(image, 0), [new_height, new_width]), [0]) pad_image, pad_offset = pad_to_ensure_size(image_scaled, output_shape[0], output_shape[1], random=False) new_height = tf.maximum(output_shape[0], new_height) new_width = tf.maximum(output_shape[1], new_width) slice_offset = (tf.random_uniform([], minval=0, maxval=new_height - output_shape[0] + 1, dtype=tf.int32), tf.random_uniform([], minval=0, maxval=new_width - output_shape[1] + 1, dtype=tf.int32)) image = array_ops.slice(pad_image, [slice_offset[0], slice_offset[1], 0], [output_shape[0], output_shape[1], 3]) image = tf.clip_by_value(image, 0, 1) resized_images.append(image) return resized_images
def train(self, feed_dict): for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) num_samples = float(feed_dict["num_samples"]) one_hot_labels = _np.zeros((int(num_samples), self.num_classes)) # convert to one hot labels = feed_dict["labels"].astype("int32").T one_hot_labels[_np.arange(int(num_samples)), labels] = 1 _, final_train_loss, final_train_output = self.sess.run( [self.optimizer, self.cost, self.predictions], feed_dict={ self.input: feed_dict['input'], self.one_hot_labels: one_hot_labels }) result = { 'loss': _np.array(final_train_loss), 'output': _np.array(final_train_output) } return result
def __init__( self, net_params, batch_size, num_features, num_classes, prediction_window, seq_len, seed, ): _utils.suppress_tensorflow_warnings() self.gpu_policy = _utils.TensorFlowGPUPolicy() self.gpu_policy.start() for key in net_params.keys(): net_params[key] = _utils.convert_shared_float_array_to_numpy( net_params[key] ) self.ac_graph = _tf.Graph() self.num_classes = num_classes self.batch_size = batch_size self.seq_len = seq_len self.sess = _tf.Session(graph=self.ac_graph) with self.ac_graph.as_default(): self.init_activity_classifier_graph( net_params, num_features, prediction_window, seed )
def predict(self, feed_dict): is_train = "labels" in feed_dict for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) feed_dict_for_session = {self.input: feed_dict["input"]} if is_train: feed_dict_for_session[self.labels] = feed_dict["labels"] feed_dict_for_session[self.weights] = feed_dict["weights"] pred_probs, loss = self.sess.run([self.predictions, self.cost], feed_dict=feed_dict_for_session) result = {"loss": _np.array(loss), "output": _np.array(pred_probs)} else: pred_probs = self.sess.run([self.predictions], feed_dict=feed_dict_for_session) result = {"output": _np.array(pred_probs)} return result
def predict(self, feed_dict): is_train = ("labels" in feed_dict) for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) num_samples = float(feed_dict["num_samples"]) one_hot_labels = _np.zeros((int(num_samples), self.num_classes)) feed_dict_for_session = {self.input: feed_dict["input"]} if is_train: # convert to one hot labels = feed_dict["labels"].astype("int32").T one_hot_labels[_np.arange(int(num_samples)), labels] = 1 feed_dict_for_session[self.one_hot_labels] = one_hot_labels pred_probs, loss = self.sess.run([self.predictions, self.cost], feed_dict=feed_dict_for_session) result = {'loss': _np.array(loss), 'output': _np.array(pred_probs)} else: pred_probs = self.sess.run([self.predictions], feed_dict=feed_dict_for_session) result = {'output': _np.array(pred_probs)} return result
def train(self, feed_dict): """ Run session for training with new batch of data (inputs, labels and weights) Parameters ---------- feed_dict: Dictionary Dictionary to store a batch of input data, corresponding labels and weights. This is currently passed from the ac_data_iterator.cpp file when a new batch of data is sent. Returns ------- result: Dictionary Loss per batch and probabilities """ for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy(feed_dict[key]) feed_dict[key] = _np.squeeze(feed_dict[key], axis=1) feed_dict[key] = _np.reshape(feed_dict[key], (feed_dict[key].shape[0], feed_dict[key].shape[1], feed_dict[key].shape[2])) _, loss, probs = self.sess.run([self.train_op, self.loss_per_seq, self.probs], feed_dict={self.data : feed_dict['input'], self.target : feed_dict['labels'], self.weight : feed_dict['weights'], self.is_training : True}) prob = _np.array(probs) probabilities = _np.reshape(prob, (prob.shape[0], prob.shape[1]*prob.shape[2])) result = {'loss' : _np.array(loss), 'output': probabilities } return result
def predict(self, feed_dict): """ Run session for predicting with new batch of data(Input) Parameters ---------- feed_dict: Dictionary Dictionary to store a batch of input data. Returns ------- output: TensorFlow Tensor Feature map from building the network. """ for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) output = self.sess.run( [self.tf_model], feed_dict={ self.images: feed_dict["input"], self.is_train: False }, ) # TODO: Include self.labels: feed_dict['label'] to handle labels from validation set result = {} result["output"] = _np.array(output[0]) return result
def train(self, feed_dict): """ Run session for training with new batch of data(Input and Label) Parameters ---------- feed_dict: Dictionary Dictionary to store a batch of input data, corresponding labels and iteration number. This is currently passed from the object_detector.py file when a new batch of data is sent. Returns ------- loss_batch: TensorFlow Tensor Loss per batch """ for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) feed_dict['labels'] = feed_dict['labels'].reshape( self.batch_size, self.grid_shape[0], self.grid_shape[1], self.num_anchors, self.num_classes + 5) _, loss_batch = self.sess.run([self.train_op, self.loss], feed_dict={ self.images: feed_dict['input'], self.labels: feed_dict['labels'] }) result = {} result['loss'] = _np.array([loss_batch]) return result
def predict(self, feed_dict): for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) tf_input_shape = [None] + list(feed_dict['input'].shape)[1:] self.tf_input = _tf.placeholder(dtype=_tf.float32, shape=tf_input_shape) self._define_training_graph = False self.__define_graph() stylized_image = self.sess.run([self.output], feed_dict={ self.tf_input: feed_dict['input'], self.tf_index: feed_dict['index'] }) stylized_raw = _np.array(stylized_image) expected_height = feed_dict['input'].shape[1] expected_width = feed_dict['input'].shape[2] # Crop to remove added padding stylized_cropped = stylized_raw[:, :, 0:expected_height, 0:expected_width, :][0] return {"output": _np.array(stylized_cropped)}
def predict(self, feed_dict): """ Run session for predicting with new batch of validation data (inputs, labels and weights) as well as test data (inputs) Parameters ---------- feed_dict: Dictionary Dictionary to store a batch of input data, corresponding labels and weights. This is currently passed from the ac_data_iterator.cpp file when a new batch of data is sent. Returns ------- result: Dictionary Loss per batch and probabilities (in case of validation data) Probabilities (in case only inputs are provided) """ for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) feed_dict[key] = _np.squeeze(feed_dict[key], axis=1) feed_dict[key] = _np.reshape( feed_dict[key], ( feed_dict[key].shape[0], feed_dict[key].shape[1], feed_dict[key].shape[2], ), ) if len(feed_dict.keys()) == 1: probs = self.sess.run( self.probs, feed_dict={ self.data: feed_dict["input"], self.is_training: False }, ) prob = _np.array(probs) probabilities = _np.reshape( prob, (prob.shape[0], prob.shape[1] * prob.shape[2])) result = {"output": probabilities} else: loss, probs = self.sess.run( [self.loss_per_seq, self.probs], feed_dict={ self.data: feed_dict["input"], self.target: feed_dict["labels"], self.weight: feed_dict["weights"], self.is_training: False, }, ) prob = _np.array(probs) probabilities = _np.reshape( prob, (prob.shape[0], prob.shape[1] * prob.shape[2])) result = {"loss": _np.array(loss), "output": probabilities} return result
def __init__(self, config, net_params): self.gpu_policy = _utils.TensorFlowGPUPolicy() self.gpu_policy.start() for key in net_params.keys(): net_params[key] = _utils.convert_shared_float_array_to_numpy( net_params[key]) for key in config.keys(): config[key] = _utils.convert_shared_float_array_to_numpy( config[key]) self.st_graph = _tf.Graph() self._batch_size = 1 self._finetune_all_params = True self._define_training_graph = bool(config['st_training']) self.sess = _tf.Session(graph=self.st_graph) with self.st_graph.as_default(): self.init_style_transfer_graph(net_params)
def train(self, feed_dict): for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) _, loss_value = self.sess.run( [self.optimizer, self.loss], feed_dict={ self.tf_input: feed_dict['input'], self.tf_index: feed_dict['index'], self.tf_style: feed_dict['labels'] }) return {"loss": _np.array(loss_value)}
def predict(self, feed_dict): """ Run session for predicting with new batch of validation data (inputs, labels and weights) as well as test data (inputs) Parameters ---------- feed_dict: Dictionary Dictionary to store a batch of input data, corresponding labels and weights. This is currently passed from the ac_data_iterator.cpp file when a new batch of data is sent. Returns ------- result: Dictionary Loss per batch and probabilities (in case of validation data) Probabilities (in case only inputs are provided) """ # Convert input for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) feed_dict[key] = _np.squeeze(feed_dict[key], axis=1) feed_dict[key] = _np.reshape( feed_dict[key], ( feed_dict[key].shape[0], feed_dict[key].shape[1], feed_dict[key].shape[2], ), ) # Generate predictions prob = self.model.predict(feed_dict['input']) probabilities = _np.reshape( prob, (prob.shape[0], prob.shape[1] * prob.shape[2])) result = {"output": probabilities} if "labels" in feed_dict.keys(): # Validation data? keras = _lazy_import_tensorflow().keras labels = keras.utils.to_categorical(feed_dict['labels'], num_classes=self.num_classes) loss = self.model.loss(y_true=labels, y_pred=prob) loss = keras.backend.get_value(loss) weights = feed_dict["weights"].reshape(loss.shape) loss = loss * weights loss = _np.sum(loss, axis=1) result["loss"] = loss return result
def __init__(self, net_params, batch_size, num_classes): """ Defines the TensorFlow model, loss, optimisation and accuracy. Then loads the weights into the model. """ self.gpu_policy = _utils.TensorFlowGPUPolicy() self.gpu_policy.start() for key in net_params.keys(): net_params[key] = _utils.convert_shared_float_array_to_numpy( net_params[key]) self.dc_graph = _tf.Graph() self.num_classes = num_classes self.batch_size = batch_size self.sess = _tf.Session(graph=self.dc_graph) with self.dc_graph.as_default(): self.init_drawing_classifier_graph(net_params)
def train(self, feed_dict): """ Run session for training with new batch of data (inputs, labels and weights) Parameters ---------- feed_dict: Dictionary Dictionary to store a batch of input data, corresponding labels and weights. This is currently passed from the ac_data_iterator.cpp file when a new batch of data is sent. Returns ------- result: Dictionary Loss per batch and probabilities """ for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) feed_dict[key] = _np.squeeze(feed_dict[key], axis=1) feed_dict[key] = _np.reshape( feed_dict[key], ( feed_dict[key].shape[0], feed_dict[key].shape[1], feed_dict[key].shape[2], ), ) keras = _lazy_import_tensorflow().keras loss = self.model.train_on_batch( x=feed_dict['input'], y=keras.utils.to_categorical(feed_dict['labels'], num_classes=self.num_classes), sample_weight=_np.reshape(feed_dict['weights'], (self.batch_size, 20))) prob = self.model.predict(feed_dict['input']) probabilities = _np.reshape( prob, (prob.shape[0], prob.shape[1] * prob.shape[2])) result = {"loss": _np.array(loss), "output": _np.array(probabilities)} return result
def train(self, feed_dict): for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy(feed_dict[key]) _, final_train_loss, final_train_output = self.sess.run( [self.optimizer, self.cost, self.predictions], feed_dict={ self.input: feed_dict["input"], self.labels: feed_dict["labels"], self.weights: feed_dict["weights"], }, ) result = { "loss": _np.array(final_train_loss), "output": _np.array(final_train_output), } return result
def predict(self, feed_dict): for key in feed_dict.keys(): feed_dict[key] = _utils.convert_shared_float_array_to_numpy( feed_dict[key]) with self.st_graph.as_default(): stylized_image = self.sess.run( fetches=[self.output], feed_dict={ self.tf_input: feed_dict["input"], self.tf_index: feed_dict["index"], }, ) stylized_raw = _np.array(stylized_image) expected_height = feed_dict["input"].shape[1] expected_width = feed_dict["input"].shape[2] # Crop to remove added padding stylized_cropped = stylized_raw[:, :, 0:expected_height, 0:expected_width, :][0] return {"output": _np.array(stylized_cropped)}
def loss_layer(self, predict, labels): """ Define loss layer Parameters ---------- predict: TensorFlow Tensor The predicted values for the batch of data labels: TensorFlow Tensor Ground truth labels for the batch of data Returns ------- loss: TensorFlow Tensor Loss (combination of regression and classification losses) """ _tf = _lazy_import_tensorflow() POS_IOU = 0.7 NEG_IOU = 0.3 rescore = int( _utils.convert_shared_float_array_to_numpy( self.config.get("od_rescore"))) lmb_coord_xy = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_coord_xy")) lmb_coord_wh = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_coord_wh")) lmb_obj = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_obj")) lmb_noobj = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_noobj")) lmb_class = _utils.convert_shared_float_array_to_numpy( self.config.get("lmb_class")) # Prediction values from model on the images ypred = _tf.reshape( predict, [-1] + list(self.grid_shape) + [self.num_anchors, 5 + self.num_classes], ) raw_xy = ypred[..., 0:2] raw_wh = ypred[..., 2:4] raw_conf = ypred[..., 4] class_scores = ypred[..., 5:] tf_anchors = _tf.constant(self.anchors) # Ground Truth info derived from ymap/labels gt_xy = labels[..., 0:2] gt_wh = labels[..., 2:4] gt_raw_wh = _tf.math.log(gt_wh / tf_anchors + 1e-5) gt_conf = labels[..., 4] gt_class = labels[..., 5:] # Calculations on predicted confidences xy = _tf.sigmoid(raw_xy) wh = _tf.exp(raw_wh) * tf_anchors wh_anchors = _tf.exp(raw_wh * 0.0) * tf_anchors lo = xy - wh / 2 hi = xy + wh / 2 gt_area = gt_wh[..., 0] * gt_wh[..., 1] gt_lo = gt_xy - gt_wh / 2 gt_hi = gt_xy + gt_wh / 2 c_inter = _tf.maximum(2 * _tf.minimum(wh_anchors / 2, gt_wh / 2), 0) c_area = wh_anchors[..., 0] * wh_anchors[..., 1] c_inter_area = c_inter[..., 0] * c_inter[..., 1] c_iou = c_inter_area / (c_area + gt_area - c_inter_area) inter = _tf.maximum(_tf.minimum(hi, gt_hi) - _tf.maximum(lo, gt_lo), 0) area = wh[..., 0] * wh[..., 1] inter_area = inter[..., 0] * inter[..., 1] iou = inter_area / (area + gt_area - inter_area) active_iou = c_iou cond_gt = _tf.cast(_tf.equal(gt_conf, _tf.constant(1.0)), dtype=_tf.float32) max_iou = _tf.reduce_max(active_iou, 3, keepdims=True) cond_max = _tf.cast(_tf.equal(active_iou, max_iou), dtype=_tf.float32) cond_above = c_iou > POS_IOU cond_logical_or = _tf.cast( _tf.math.logical_or(_tf.cast(cond_max, dtype=_tf.bool), _tf.cast(cond_above, dtype=_tf.bool)), dtype=_tf.float32, ) cond_obj = _tf.cast( _tf.math.logical_and( _tf.cast(cond_gt, dtype=_tf.bool), _tf.cast(cond_logical_or, dtype=_tf.bool), ), dtype=_tf.float32, ) kr_obj_ij = _tf.stop_gradient(cond_obj) cond_below = c_iou < NEG_IOU cond_logical_not = _tf.cast(_tf.math.logical_not( _tf.cast(cond_obj, dtype=_tf.bool)), dtype=_tf.float32) cond_noobj = _tf.cast( _tf.math.logical_and( _tf.cast(cond_below, dtype=_tf.bool), _tf.cast(cond_logical_not, dtype=_tf.bool), ), dtype=_tf.float32, ) kr_noobj_ij = _tf.stop_gradient(cond_noobj) count = _tf.reduce_sum(kr_obj_ij) eps_count = _tf.math.add(count, _tf.constant(1e-4)) scale_conf = 1 / (self.batch_size * self.grid_shape[0] * self.grid_shape[1]) kr_obj_ij_plus1 = _tf.expand_dims(kr_obj_ij, -1) if rescore: obj_gt_conf = kr_obj_ij * _tf.stop_gradient(iou) else: obj_gt_conf = kr_obj_ij obj_w_obj = kr_obj_ij * lmb_obj obj_w_noobj = kr_noobj_ij * lmb_noobj obj_w = _tf.math.add(obj_w_obj, obj_w_noobj) loss_xy = (lmb_coord_xy * _tf.reduce_sum(kr_obj_ij_plus1 * _tf.square(gt_xy - xy)) / eps_count) loss_wh = _tf.losses.huber_loss( labels=gt_raw_wh, predictions=raw_wh, weights=lmb_coord_wh * kr_obj_ij_plus1, delta=1.0, ) loss_conf = scale_conf * _tf.reduce_sum( obj_w * _tf.nn.sigmoid_cross_entropy_with_logits( labels=obj_gt_conf, logits=raw_conf)) loss_cls = (lmb_class * _tf.reduce_sum( kr_obj_ij * _tf.nn.softmax_cross_entropy_with_logits_v2( labels=gt_class, logits=class_scores)) / eps_count) losses = [loss_xy, loss_wh, loss_conf, loss_cls] loss = _tf.add_n(losses) return loss
def __init__(self, net_params, batch_size, num_classes): """ Defines the TensorFlow model, loss, optimisation and accuracy. Then loads the MXNET weights into the model. """ self.gpu_policy = _utils.TensorFlowGPUPolicy() self.gpu_policy.start() for key in net_params.keys(): net_params[key] = _utils.convert_shared_float_array_to_numpy( net_params[key]) _tf.reset_default_graph() self.num_classes = num_classes self.batch_size = batch_size self.input = _tf.placeholder(_tf.float32, [None, 28, 28, 1]) self.one_hot_labels = _tf.placeholder(_tf.int32, [None, self.num_classes]) # Weights weights = { 'drawing_conv0_weight': _tf.Variable(_tf.zeros([3, 3, 1, 16]), name='drawing_conv0_weight'), 'drawing_conv1_weight': _tf.Variable(_tf.zeros([3, 3, 16, 32]), name='drawing_conv1_weight'), 'drawing_conv2_weight': _tf.Variable(_tf.zeros([3, 3, 32, 64]), name='drawing_conv2_weight'), 'drawing_dense0_weight': _tf.Variable(_tf.zeros([576, 128]), name='drawing_dense0_weight'), 'drawing_dense1_weight': _tf.Variable(_tf.zeros([128, self.num_classes]), name='drawing_dense1_weight') } # Biases biases = { 'drawing_conv0_bias': _tf.Variable(_tf.zeros([16]), name='drawing_conv0_bias'), 'drawing_conv1_bias': _tf.Variable(_tf.zeros([32]), name='drawing_conv1_bias'), 'drawing_conv2_bias': _tf.Variable(_tf.zeros([64]), name='drawing_conv2_bias'), 'drawing_dense0_bias': _tf.Variable(_tf.zeros([128]), name='drawing_dense0_bias'), 'drawing_dense1_bias': _tf.Variable(_tf.zeros([self.num_classes]), name='drawing_dense1_bias') } conv_1 = _tf.nn.conv2d(self.input, weights["drawing_conv0_weight"], strides=1, padding='SAME') conv_1 = _tf.nn.bias_add(conv_1, biases["drawing_conv0_bias"]) relu_1 = _tf.nn.relu(conv_1) pool_1 = _tf.nn.max_pool2d(relu_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') conv_2 = _tf.nn.conv2d(pool_1, weights["drawing_conv1_weight"], strides=1, padding='SAME') conv_2 = _tf.nn.bias_add(conv_2, biases["drawing_conv1_bias"]) relu_2 = _tf.nn.relu(conv_2) pool_2 = _tf.nn.max_pool2d(relu_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') conv_3 = _tf.nn.conv2d(pool_2, weights["drawing_conv2_weight"], strides=1, padding='SAME') conv_3 = _tf.nn.bias_add(conv_3, biases["drawing_conv2_bias"]) relu_3 = _tf.nn.relu(conv_3) pool_3 = _tf.nn.max_pool2d(relu_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # Flatten the data to a 1-D vector for the fully connected layer fc1 = _tf.reshape(pool_3, (-1, 576)) fc1 = _tf.nn.xw_plus_b(fc1, weights=weights["drawing_dense0_weight"], biases=biases["drawing_dense0_bias"]) fc1 = _tf.nn.relu(fc1) out = _tf.nn.xw_plus_b(fc1, weights=weights["drawing_dense1_weight"], biases=biases["drawing_dense1_bias"]) softmax_out = _tf.nn.softmax(out) self.predictions = softmax_out # Loss self.cost = _tf.losses.softmax_cross_entropy( logits=out, onehot_labels=self.one_hot_labels, reduction=_tf.losses.Reduction.NONE) # Optimizer self.optimizer = _tf.train.AdamOptimizer(learning_rate=0.001).minimize( self.cost) # Predictions correct_prediction = _tf.equal(_tf.argmax(self.predictions, 1), _tf.argmax(self.one_hot_labels, 1)) self.sess = _tf.Session() self.sess.run(_tf.global_variables_initializer()) # Assign the initialised weights from C++ to tensorflow layers = [ 'drawing_conv0_weight', 'drawing_conv0_bias', 'drawing_conv1_weight', 'drawing_conv1_bias', 'drawing_conv2_weight', 'drawing_conv2_bias', 'drawing_dense0_weight', 'drawing_dense0_bias', 'drawing_dense1_weight', 'drawing_dense1_bias' ] for key in layers: if 'bias' in key: self.sess.run( _tf.assign( _tf.get_default_graph().get_tensor_by_name(key + ":0"), net_params[key])) else: if 'drawing_dense0_weight' in key: ''' To make output of CoreML pool3 (NCHW) compatible with TF (NHWC). Decompose FC weights to NCHW. Transpose to NHWC. Reshape back to FC. ''' coreml_128_576 = net_params[key] coreml_128_576 = _np.reshape(coreml_128_576, (128, 64, 3, 3)) coreml_128_576 = _np.transpose(coreml_128_576, (0, 2, 3, 1)) coreml_128_576 = _np.reshape(coreml_128_576, (128, 576)) self.sess.run( _tf.assign( _tf.get_default_graph().get_tensor_by_name(key + ":0"), _np.transpose(coreml_128_576, (1, 0)))) elif 'dense' in key: dense_weights = _utils.convert_dense_coreml_to_tf( net_params[key]) self.sess.run( _tf.assign( _tf.get_default_graph().get_tensor_by_name(key + ":0"), dense_weights)) else: # TODO: Call _utils.convert_conv2d_coreml_to_tf when #2513 is merged self.sess.run( _tf.assign( _tf.get_default_graph().get_tensor_by_name(key + ":0"), _np.transpose(net_params[key], (2, 3, 1, 0))))
def __init__( self, net_params, batch_size, num_features, num_classes, prediction_window, seq_len, seed, ): _utils.suppress_tensorflow_warnings() self.num_classes = num_classes self.batch_size = batch_size tf = _lazy_import_tensorflow() keras = tf.keras ############################################# # Define the Neural Network ############################################# inputs = keras.Input(shape=(prediction_window * seq_len, num_features)) # First dense layer dense = keras.layers.Conv1D( filters=CONV_H, kernel_size=(prediction_window), padding='same', strides=prediction_window, use_bias=True, activation='relu', ) cur_outputs = dense(inputs) # First dropout layer dropout = keras.layers.Dropout( rate=0.2, seed=seed, ) cur_outputs = dropout(cur_outputs) # LSTM layer lstm = keras.layers.LSTM( units=LSTM_H, return_sequences=True, use_bias=True, ) cur_outputs = lstm(cur_outputs) # Second dense layer dense2 = keras.layers.Dense(DENSE_H) cur_outputs = dense2(cur_outputs) # Batch norm layer batch_norm = keras.layers.BatchNormalization() cur_outputs = batch_norm(cur_outputs) # ReLU layer relu = keras.layers.ReLU() cur_outputs = relu(cur_outputs) # Final dropout layer dropout = keras.layers.Dropout(rate=0.5, seed=seed) cur_outputs = dropout(cur_outputs) # Final dense layer dense3 = keras.layers.Dense(num_classes, use_bias=False) cur_outputs = dense3(cur_outputs) # Softmax layer softmax = keras.layers.Softmax() cur_outputs = softmax(cur_outputs) self.model = keras.Model(inputs=inputs, outputs=cur_outputs) self.model.compile(loss=tf.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(learning_rate=1e-3), sample_weight_mode="temporal") ############################################# # Load the Weights of the Neural Network ############################################# for key in net_params.keys(): net_params[key] = _utils.convert_shared_float_array_to_numpy( net_params[key]) # Set weight for first dense layer l = self.model.layers[1] l.set_weights( (_utils.convert_conv1d_coreml_to_tf(net_params["conv_weight"]), net_params["conv_bias"])) # Set LSTM weights i2h, h2h, bias = [], [], [] for i in ('i', 'f', 'c', 'o'): i2h.append(eval('net_params["lstm_i2h_%s_weight"]' % i)) h2h.append(eval('net_params["lstm_h2h_%s_weight"]' % i)) bias.append(eval('net_params["lstm_h2h_%s_bias"]' % i)) i2h = _np.concatenate(i2h, axis=0) h2h = _np.concatenate(h2h, axis=0) bias = _np.concatenate(bias, axis=0) i2h = _np.swapaxes(i2h, 1, 0) h2h = _np.swapaxes(h2h, 1, 0) l = self.model.layers[3] l.set_weights((i2h, h2h, bias)) # Set weight for second dense layer l = self.model.layers[4] l.set_weights( (net_params['dense0_weight'].reshape(DENSE_H, LSTM_H).swapaxes(0, 1), net_params['dense0_bias'])) # Set batch Norm weights l = self.model.layers[5] l.set_weights( (net_params['bn_gamma'], net_params['bn_beta'], net_params['bn_running_mean'], net_params['bn_running_var'])) # Set weights for last dense layer l = self.model.layers[8] l.set_weights((net_params['dense1_weight'].reshape( (self.num_classes, DENSE_H)).swapaxes(0, 1), ))
def __init__(self, net_params, batch_size, num_features, num_classes, prediction_window, seq_len): self.gpu_policy = _utils.TensorFlowGPUPolicy() self.gpu_policy.start() for key in net_params.keys(): net_params[key] = _utils.convert_shared_float_array_to_numpy( net_params[key]) _tf.reset_default_graph() self.num_classes = num_classes self.batch_size = batch_size self.seq_len = seq_len # Vars self.data = _tf.placeholder( _tf.float32, [None, prediction_window * seq_len, num_features]) self.weight = _tf.placeholder(_tf.float32, [None, seq_len, 1]) self.target = _tf.placeholder(_tf.int32, [None, seq_len, 1]) self.is_training = _tf.placeholder(_tf.bool) # Reshaping weights reshaped_weight = _tf.reshape(self.weight, [self.batch_size, seq_len]) # One hot encoding target reshaped_target = _tf.reshape(self.target, [self.batch_size, seq_len]) one_hot_target = _tf.one_hot(reshaped_target, depth=self.num_classes, axis=-1) # Weights self.weights = { 'conv_weight': _tf.Variable(_tf.zeros([prediction_window, num_features, CONV_H]), name='conv_weight'), 'dense0_weight': _tf.Variable(_tf.zeros([LSTM_H, DENSE_H]), name='dense0_weight'), 'dense1_weight': _tf.Variable(_tf.zeros([DENSE_H, self.num_classes]), name='dense1_weight') } # Biases self.biases = { 'conv_bias': _tf.Variable(_tf.zeros([CONV_H]), name='conv_bias'), 'dense0_bias': _tf.Variable(_tf.zeros([DENSE_H]), name='dense0_bias'), 'dense1_bias': _tf.Variable(_tf.zeros([num_classes]), name='dense1_bias') } # Convolution conv = _tf.nn.conv1d(self.data, self.weights['conv_weight'], stride=prediction_window, padding='SAME') conv = _tf.nn.bias_add(conv, self.biases['conv_bias']) conv = _tf.nn.relu(conv) dropout = _tf.layers.dropout(conv, rate=0.2, training=self.is_training) # Long Stem Term Memory lstm = self.load_lstm_weights_params(net_params) cells = _tf.nn.rnn_cell.LSTMCell(num_units=LSTM_H, reuse=_tf.AUTO_REUSE, forget_bias=0.0, initializer=_tf.initializers.constant( lstm, verify_shape=True)) init_state = cells.zero_state(batch_size, _tf.float32) rnn_outputs, final_state = _tf.nn.dynamic_rnn(cells, dropout, initial_state=init_state) # Dense dense = _tf.reshape(rnn_outputs, (-1, LSTM_H)) dense = _tf.add(_tf.matmul(dense, self.weights['dense0_weight']), self.biases['dense0_bias']) dense = _tf.layers.batch_normalization( inputs=dense, beta_initializer=_tf.initializers.constant(net_params['bn_beta'], verify_shape=True), gamma_initializer=_tf.initializers.constant(net_params['bn_gamma'], verify_shape=True), moving_mean_initializer=_tf.initializers.constant( net_params['bn_running_mean'], verify_shape=True), moving_variance_initializer=_tf.initializers.constant( net_params['bn_running_var'], verify_shape=True), training=self.is_training) dense = _tf.nn.relu(dense) dense = _tf.layers.dropout(dense, rate=0.5, training=self.is_training) # Output out = _tf.add(_tf.matmul(dense, self.weights['dense1_weight']), self.biases['dense1_bias']) out = _tf.reshape(out, (-1, self.seq_len, self.num_classes)) self.probs = _tf.nn.softmax(out) # Weights seq_sum_weights = _tf.reduce_sum(reshaped_weight, axis=1) binary_seq_sum_weights = _tf.reduce_sum( _tf.cast(seq_sum_weights > 0, dtype=_tf.float32)) # Loss loss = _tf.losses.softmax_cross_entropy( logits=out, onehot_labels=one_hot_target, weights=reshaped_weight, reduction=_tf.losses.Reduction.NONE) self.loss_per_seq = _tf.reduce_sum(loss, axis=1) / (seq_sum_weights + 1e-5) self.loss_op = _tf.reduce_sum( self.loss_per_seq) / (binary_seq_sum_weights + 1e-5) # Optimizer update_ops = _tf.get_collection(_tf.GraphKeys.UPDATE_OPS) self.set_learning_rate(1e-3) train_op = self.optimizer.minimize(self.loss_op) self.train_op = _tf.group([train_op, update_ops]) # Session self.sess = _tf.Session() # Initialize all variables self.sess.run(_tf.global_variables_initializer()) self.sess.run(_tf.local_variables_initializer()) self.load_weights(net_params)
def loss_layer(self, predict, labels): """ Define loss layer Parameters ---------- predict: TensorFlow Tensor The predicted values for the batch of data labels: TensorFlow Tensor Ground truth labels for the batch of data Returns ------- loss: TensorFlow Tensor Loss (combination of regression and classification losses) """ rescore = int(_utils.convert_shared_float_array_to_numpy(self.config.get('od_rescore'))) lmb_coord_xy = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_coord_xy')) lmb_coord_wh = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_coord_wh')) lmb_obj = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_obj')) lmb_noobj = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_noobj')) lmb_class = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_class')) # Prediction values from model on the images ypred = _tf.reshape(predict, [-1] + list(self.grid_shape) + [self.num_anchors, 5 + self.num_classes]) raw_xy = ypred[..., 0:2] raw_wh = ypred[..., 2:4] raw_conf = ypred[..., 4] class_scores = ypred[..., 5:] tf_anchors = _tf.constant(self.anchors) # Ground Truth info derived from ymap/labels gt_xy = labels[..., 0:2] gt_wh = labels[..., 2:4] gt_raw_wh = _tf.math.log(gt_wh / tf_anchors + 1e-5) gt_conf = labels[..., 4] gt_conf0 = labels[..., 0:1, 4] gt_class = labels[..., 5:] # Calculations on predicted confidences xy = _tf.sigmoid(raw_xy) wh = _tf.exp(raw_wh) * tf_anchors wh_anchors = _tf.exp(raw_wh * 0.0) * tf_anchors lo = xy - wh / 2 hi = xy + wh / 2 gt_area = gt_wh[..., 0] * gt_wh[..., 1] gt_lo = gt_xy - gt_wh / 2 gt_hi = gt_xy + gt_wh / 2 c_inter = _tf.maximum(2 * _tf.minimum(wh_anchors / 2, gt_wh / 2), 0) c_area = wh_anchors[..., 0] * wh_anchors[..., 1] c_inter_area = c_inter[..., 0] * c_inter[..., 1] c_iou = c_inter_area / (c_area + gt_area - c_inter_area) inter = _tf.maximum(_tf.minimum(hi, gt_hi) - _tf.maximum(lo, gt_lo), 0) area = wh[..., 0] * wh[..., 1] inter_area = inter[..., 0] * inter[..., 1] iou = inter_area / (area + gt_area - inter_area) active_iou = c_iou max_iou = _tf.reduce_max(active_iou, 3, keepdims=True) resp_box = _tf.cast(_tf.equal(active_iou, max_iou), dtype=_tf.float32) count = _tf.reduce_sum(gt_conf0) kr_obj_ij = _tf.stop_gradient(resp_box * gt_conf) kr_noobj_ij = 1 - kr_obj_ij s = 1 / (self.batch_size * self.grid_shape[0] * self.grid_shape[1]) kr_obj_ij_plus1 = _tf.expand_dims(kr_obj_ij, -1) if rescore: obj_gt_conf = kr_obj_ij * _tf.stop_gradient(iou) else: obj_gt_conf = kr_obj_ij kr_box = kr_obj_ij_plus1 obj_w = (kr_obj_ij * lmb_obj + kr_noobj_ij * lmb_noobj) loss_xy = lmb_coord_xy * _tf.reduce_sum(kr_box * _tf.square(gt_xy - xy)) / (count + 0.01) loss_wh = _tf.losses.huber_loss (labels=gt_raw_wh, predictions=raw_wh, weights=lmb_coord_wh * kr_box, delta= 1.0) # Confidence loss loss_conf = s * _tf.reduce_sum( obj_w * _tf.nn.sigmoid_cross_entropy_with_logits(labels=obj_gt_conf, logits=raw_conf)) # TODO: tf.nn.softmax_cross_entropy_with_logits_v2 instead of tf.nn.softmax_cross_entropy_with_logits loss_cls = lmb_class * _tf.reduce_sum( kr_obj_ij * _tf.nn.softmax_cross_entropy_with_logits_v2(labels=gt_class, logits=class_scores)) / ( count + 0.01) losses = [loss_xy, loss_wh, loss_conf, loss_cls] loss = _tf.add_n(losses) return loss
def get_augmented_images(images, output_shape): # Store transformations and augmented_images for the input batch transformations = [] augmented_images = [] # Augmentation option min_scale = 1/1.5 max_scale = 1.5 max_aspect_ratio=1.5 max_hue=0.05 max_brightness=0.05 max_saturation=1.25 max_contrast=1.25 horizontal_flip=True for i in range(len(images)): image = images[i] image = _utils.convert_shared_float_array_to_numpy(image) height, width, _ = tf.unstack(tf.shape(image)) scale_h = tf.random_uniform([], minval=min_scale, maxval=max_scale) scale_w = scale_h * tf.exp(tf.random_uniform([], minval=-np.log(max_aspect_ratio), maxval=np.log(max_aspect_ratio))) new_height = tf.to_int32(tf.to_float(height) * scale_h) new_width = tf.to_int32(tf.to_float(width) * scale_w) image_scaled = tf.squeeze(tf.image.resize_bilinear(tf.expand_dims(image, 0), [new_height, new_width]), [0]) # Image padding pad_image, pad_offset = pad_to_ensure_size(image_scaled, output_shape[0], output_shape[1]) new_height = tf.maximum(output_shape[0], new_height) new_width = tf.maximum(output_shape[1], new_width) slice_offset = (tf.random_uniform([], minval=0, maxval=new_height - output_shape[0] + 1, dtype=tf.int32), tf.random_uniform([], minval=0, maxval=new_width - output_shape[1] + 1, dtype=tf.int32)) augmented_image = array_ops.slice(pad_image, [slice_offset[0], slice_offset[1], 0], [output_shape[0], output_shape[1], 3]) if horizontal_flip: uniform_random = random_ops.random_uniform([], 0, 1.0) did_horiz_flip = math_ops.less(uniform_random, .5) augmented_image = control_flow_ops.cond(did_horiz_flip, lambda: array_ops.reverse(augmented_image, [1]), lambda: augmented_image) flip_sign = 1 - tf.to_float(did_horiz_flip) * 2 else: flip_sign = 1 did_horiz_flip = tf.constant(False) ty = tf.to_float(pad_offset[0] - slice_offset[0] ) tx = flip_sign * tf.to_float(pad_offset[1] - slice_offset[1] ) + tf.to_float(did_horiz_flip) * output_shape[1] # Make the transformation matrix transformation = tf.reshape(tf.stack([ scale_h, 0.0, ty, 0.0, flip_sign * scale_w, tx, 0.0, 0.0, 1.0] ), (3, 3)) if max_hue is not None and max_hue > 0: image = tf.image.random_hue(augmented_image, max_delta=max_hue) if max_brightness is not None and max_brightness > 0: image = tf.image.random_brightness(augmented_image, max_delta=max_brightness) if max_saturation is not None and max_saturation > 1.0: log_sat = np.log(max_saturation) image = tf.image.random_saturation(augmented_image, lower=np.exp(-log_sat), upper=np.exp(log_sat)) if max_contrast is not None and max_contrast > 1.0: log_con = np.log(max_contrast) image = tf.image.random_contrast(augmented_image, lower=np.exp(-log_con), upper=np.exp(log_con)) augmented_image = tf.clip_by_value(augmented_image, 0, 1) augmented_images.append(augmented_image) transformations.append(transformation) return augmented_images, transformations