def yolo_head(feats, anchors, num_classes): # convert anchors to shape 1 , 1 , 1 , len of anchors , 2 num_anchors = len(anchors) anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # conv_dims , width and hight of the grid _, conv_height, conv_width, _ = K.int_shape(feats) conv_dims = K.variable([conv_width, conv_height]) # reshape yolo network output to None , grid_width , grid_hight , num of amnchors , num of classes + 5 feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) # convert conv_dims after casting it to feats datatype to 1 , 1 , 1 , 2 conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # create grid from (0 , 0 ) to (width , hight) conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. conv_index = K.variable( conv_index.reshape(1, conv_height, conv_width, 1, 2)) box_confidence = K.sigmoid(feats[..., 4:5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_class_probs = K.softmax(feats[..., 5:]) box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs
def spectral_clustering(x, scale, n_nbrs=None, affinity='full', W=None): ''' Computes the eigenvectors of the graph Laplacian of x, using the full Gaussian affinity matrix (full), the symmetrized Gaussian affinity matrix with k nonzero affinities for each point (knn), or the Siamese affinity matrix (siamese) x: input data n_nbrs: number of neighbors used affinity: the aforementeiond affinity mode returns: the eigenvectors of the spectral clustering algorithm ''' if affinity == 'full': W = K.eval(cf.full_affinity(K.variable(x), scale)) elif affinity == 'knn': if n_nbrs is None: raise ValueError('n_nbrs must be provided if affinity = knn!') W = K.eval(cf.knn_affinity(K.variable(x), scale, n_nbrs)) elif affinity == 'siamese': if W is None: print('no affinity matrix supplied') return d = np.sum(W, axis=1) D = np.diag(d) # (unnormalized) graph laplacian for spectral clustering L = D - W Lambda, V = np.linalg.eigh(L) return (Lambda, V)
def get_GRU_components(inputs, states, weight): units = weight[0].shape[0] kernel = K.variable(weight[0]) # shape = (input_dim, self.units * 3) recurrent_kernel = K.variable( weight[1]) # shape = (self.units, self.units * 3) bias = K.variable(weight[2]) # bias_shape = (3 * self.units,) inputs = K.variable(inputs) # Not sure. h_tm1 = K.variable(states) # Previous memory state. # Update gate. kernel_z = kernel[:, :units] recurrent_kernel_z = recurrent_kernel[:, :units] input_bias_z = bias[:units] # Reset gate. kernel_r = kernel[:, units:units * 2] recurrent_kernel_r = recurrent_kernel[:, units:units * 2] input_bias_r = bias[units:units * 2] # New gate. kernel_h = kernel[:, units * 2:] recurrent_kernel_h = recurrent_kernel[:, units * 2:] input_bias_h = bias[units * 2:] x_z = K.bias_add(K.dot(inputs, kernel_z), input_bias_z) x_r = K.bias_add(K.dot(inputs, kernel_r), input_bias_r) x_h = K.bias_add(K.dot(inputs, kernel_h), input_bias_h) recurrent_z = K.dot(h_tm1, recurrent_kernel_z) recurrent_r = K.dot(h_tm1, recurrent_kernel_r) z = hard_sigmoid(x_z + recurrent_z) # Recurrent activation = 'hard_sigmoid'. r = hard_sigmoid(x_r + recurrent_r) recurrent_h = K.dot(r * h_tm1, recurrent_kernel_h) # Get split part of recurrent_h. split_recurrent_h = K.expand_dims(h_tm1, axis=-1) * recurrent_kernel_h r_unsqueeze = K.expand_dims(r, axis=-1) recompute_recurrent_h = K.sum(r_unsqueeze * split_recurrent_h, axis=1) #print(recurrent_h.shape, h_tm1.shape, recurrent_kernel_h.shape, split_recurrent_h.shape) #print(K.get_value(recompute_recurrent_h)[0, :3], np.mean(K.get_value(recompute_recurrent_h))) #print(K.get_value(recurrent_h)[0, :3], np.mean(K.get_value(recurrent_h))) delta = np.mean( np.abs(K.get_value(recompute_recurrent_h) - K.get_value(recurrent_h))) print("delta =", delta, np.mean(K.get_value(recompute_recurrent_h)), np.mean(K.get_value(recurrent_h))) assert delta < 1e-6, "r gate is wrong." hh = tanh(x_h + recurrent_h) # Activation = 'tanh'. # Previous and candidate state mixed by update gate. h = z * h_tm1 + (1 - z) * hh return K.get_value(h_tm1), K.get_value(h), K.get_value(z), K.get_value( r), K.get_value(hh), K.get_value(x_h), K.get_value(split_recurrent_h)
def do_2d_convolution( feature_matrix, kernel_matrix, pad_edges=False, stride_length_px=1): """Convolves 2-D feature maps with 2-D kernel. m = number of rows in kernel n = number of columns in kernel c = number of output feature maps (channels) :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x C or 1 x M x N x C. :param kernel_matrix: Kernel as numpy array. Dimensions must be m x n x C x c. :param pad_edges: Boolean flag. If True, edges of input feature maps will be zero-padded during convolution, so spatial dimensions of the output feature maps will be the same (M x N). If False, dimensions of the output maps will be (M - m + 1) x (N - n + 1). :param stride_length_px: Stride length (pixels). The kernel will move by this many rows or columns at a time as it slides over each input feature map. :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x M x N x c or 1 x (M - m + 1) x (N - n + 1) x c, depending on whether or not edges are padded. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_numpy_array_without_nan(kernel_matrix) error_checking.assert_is_numpy_array(kernel_matrix, num_dimensions=4) error_checking.assert_is_boolean(pad_edges) error_checking.assert_is_integer(stride_length_px) error_checking.assert_is_geq(stride_length_px, 1) if len(feature_matrix.shape) == 3: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=4) if pad_edges: padding_string = 'same' else: padding_string = 'valid' feature_tensor = K.conv2d( x=K.variable(feature_matrix), kernel=K.variable(kernel_matrix), strides=(stride_length_px, stride_length_px), padding=padding_string, data_format='channels_last' ) return feature_tensor.numpy()
def yolo_eval(yolo_outputs, image_shape, max_boxes=10, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input batch and return filtered boxes.""" box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs boxes = yolo_boxes_to_corners(box_xy, box_wh) boxes, scores, classes = yolo_filter_boxes( boxes, box_confidence, box_class_probs, threshold=score_threshold) # Scale boxes back to original image shape. height = image_shape[0] width = image_shape[1] image_dims = K.stack([height, width, height, width]) image_dims = K.reshape(image_dims, [1, 4]) boxes = boxes * image_dims # TODO: Something must be done about this ugly hack! max_boxes_tensor = K.variable(max_boxes, dtype='int32') K.get_session().run(tf.compat.v1.variables_initializer([max_boxes_tensor])) nms_index = tf.image.non_max_suppression( boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) boxes = K.gather(boxes, nms_index) scores = K.gather(scores, nms_index) classes = K.gather(classes, nms_index) return boxes, scores, classes
def do_3d_pooling(feature_matrix, stride_length_px=2, pooling_type_string=MAX_POOLING_TYPE_STRING): """Pools 3-D feature maps. :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x H x C or 1 x M x N x H x C. :param stride_length_px: See doc for `do_2d_pooling`. :param pooling_type_string: Pooling type (must be accepted by `_check_pooling_type`). :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x m x n x h x C. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_integer(stride_length_px) error_checking.assert_is_geq(stride_length_px, 2) _check_pooling_type(pooling_type_string) if len(feature_matrix.shape) == 4: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=5) feature_tensor = K.pool3d( x=K.variable(feature_matrix), pool_mode=pooling_type_string, pool_size=(stride_length_px, stride_length_px, stride_length_px), strides=(stride_length_px, stride_length_px, stride_length_px), padding='valid', data_format='channels_last' ) return feature_tensor.numpy()
def __center_separate_loss(centers): distances = [] # normal_distance = __euclidean( # centers[0], # K.zeros_like(centers[0]) # ) # distances.append(normal_distance) for label_i in range(CONFIG["num_classes"]): for label_j in range(CONFIG["num_classes"]): if label_i < label_j: center_i = centers[label_i] center_j = centers[label_j] distance = __euclidean(center_i, center_j) distances.append(distance) loss = K.variable(0.0) # average_distance = tf.reduce_mean(distances) for i in range(len(distances)): distance = distances[i] # distance = tf.multiply(-1., K.log(distance)) # distance = tf.pow(distance, -2) distance = tf.pow(tf.subtract(distance, 2.5), 2) # distance = tf.log(distance) # distance = tf.multiply(-1., tf.pow(distance, 2)) loss = tf.add(loss, distance) return loss
def Orthonorm(x, name=None): ''' Builds keras layer that handles orthogonalization of x x: an n x d input matrix name: name of the keras layer returns: a keras layer instance. during evaluation, the instance returns an n x d orthogonal matrix if x is full rank and not singular ''' # get dimensionality of x d = x.get_shape().as_list()[-1] # compute orthogonalizing matrix ortho_weights = orthonorm_op(x) # create variable that holds this matrix ortho_weights_store = K.variable(np.zeros((d, d))) # create op that saves matrix into variable ortho_weights_update = tf.assign(ortho_weights_store, ortho_weights, name='ortho_weights_update') # switch between stored and calculated weights based on training or validation l = Lambda(lambda x: K.in_train_phase(K.dot(x, ortho_weights), K.dot(x, ortho_weights_store)), name=name) l.add_update(ortho_weights_update) return l
def do_3d_convolution( feature_matrix, kernel_matrix, pad_edges=False, stride_length_px=1): """Convolves 3-D feature maps with 3-D kernel. m = number of rows in kernel n = number of columns in kernel h = number of height in kernel c = number of output feature maps (channels) :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x H x C or 1 x M x N x H x C. :param kernel_matrix: Kernel as numpy array. Dimensions must be m x n x h x C x c. :param pad_edges: See doc for `do_2d_convolution`. :param stride_length_px: See doc for `do_2d_convolution`. :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x M x N x H x c or 1 x (M - m + 1) x (N - n + 1) x (H - h + 1) x c, depending on whether or not edges are padded. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_numpy_array_without_nan(kernel_matrix) error_checking.assert_is_numpy_array(kernel_matrix, num_dimensions=5) error_checking.assert_is_boolean(pad_edges) error_checking.assert_is_integer(stride_length_px) error_checking.assert_is_geq(stride_length_px, 1) if len(feature_matrix.shape) == 4: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=5) if pad_edges: padding_string = 'same' else: padding_string = 'valid' feature_tensor = K.conv3d( x=K.variable(feature_matrix), kernel=K.variable(kernel_matrix), strides=(stride_length_px, stride_length_px, stride_length_px), padding=padding_string, data_format='channels_last' ) return K.eval(feature_tensor)
def full_affinity(input_x, scale): """Calculates the symmetrized full Gaussian affinity matrix, scaled by a provided scale. Args: input_x: input dataset of size n x d scale: provided scale Returns: n x n affinity matrix """ sigma = K.variable(scale) dist_x = squared_distance(input_x) sigma_squared = K.expand_dims(K.pow(sigma, 2), -1) weight_mat = K.exp(-dist_x / (2 * sigma_squared)) return weight_mat
def __center_loss(y_true, y_pred, centers): y_true_value = K.argmax(y_true) loss = K.variable(0.0) for label in range(CONFIG["num_classes"]): center = centers[label] indices = tf.where(tf.equal(y_true_value, label)) pred_per_class = tf.gather_nd(y_pred, indices=indices) diff = tf.subtract(pred_per_class, center) square_diff = K.pow(diff, 2) sum = K.sum(K.sum(square_diff, axis=-1), axis=-1) loss = tf.add(loss, sum) return loss
def gru_with_z_gate(x, weight): h_tm1, inputs, r, hh = x[0], x[1], x[2], x[3] weight = K.variable(weight) units = h_tm1.shape[-1] kernel_z = weight[:units, :units] recurrent_kernel_z = weight[units:units * 2, :units] input_bias_z = weight[units * 2, :units] # Change to 1 dim. x_z = K.bias_add(K.dot(inputs, kernel_z), input_bias_z) recurrent_z = K.dot(h_tm1, recurrent_kernel_z) z_without_activate = x_z + recurrent_z z = hard_sigmoid(z_without_activate) h = z * h_tm1 + (1 - z) * hh #return h return z
def weighted_categorical_crossentropy(self, weights): weights = K.variable(weights) def loss(y_true, y_pred): # scale predictions so that the class probas of each sample sum to 1 y_pred /= K.sum(y_pred, axis=-1, keepdims=True) # clip to prevent NaN's and Inf's y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) # calc loss = y_true * K.log(y_pred) * weights loss = -K.sum(loss, -1) return loss return loss
def inject(self, model): """Inject the Lookahead algorithm for the given model. The following code is modified from keras's _make_train_function method. See: https://github.com/keras-team/keras/blob/master/keras/engine/training.py#L497 """ if not hasattr(model, 'train_function'): raise RuntimeError('You must compile your model before using it.') model._check_trainable_weights_consistency() if model.train_function is None: inputs = (model._feed_inputs + model._feed_targets + model._feed_sample_weights) if model._uses_dynamic_learning_phase(): inputs += [K.learning_phase()] fast_params = model._collected_trainable_weights with K.name_scope('training'): with K.name_scope(model.optimizer.__class__.__name__): training_updates = model.optimizer.get_updates( params=fast_params, loss=model.total_loss) slow_params = [K.variable(p) for p in fast_params] fast_updates = (model.updates + training_updates + model.metrics_updates) slow_updates, copy_updates = [], [] for p, q in zip(fast_params, slow_params): slow_updates.append(K.update(q, q + self.alpha * (p - q))) copy_updates.append(K.update(p, q)) # Gets loss and metrics. Updates weights at each call. fast_train_function = K.function(inputs, [model.total_loss] + model.metrics_tensors, updates=fast_updates, name='fast_train_function', **model._function_kwargs) def F(inputs): self.count += 1 R = fast_train_function(inputs) if self.count % self.k == 0: K.batch_get_value(slow_updates) K.batch_get_value(copy_updates) return R model.train_function = F
def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5): max_boxes_tensor = K.variable(max_boxes, dtype='int32') K.get_session().run(tf.compat.v1.variables_initializer([max_boxes_tensor])) nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold) scores = K.gather(scores, nms_indices) boxes = K.gather(boxes, nms_indices) classes = K.gather(classes, nms_indices) return scores, boxes, classes
def full_affinity(X, scale): ''' Calculates the symmetrized full Gaussian affinity matrix, scaled by a provided scale X: input dataset of size n scale: provided scale returns: n x n affinity matrix ''' sigma = K.variable(scale) Dx = squared_distance(X) sigma_squared = K.pow(sigma, 2) sigma_squared = K.expand_dims(sigma_squared, -1) Dx_scaled = Dx / (2 * sigma_squared) W = K.exp(-Dx_scaled) return W
def center_loss(y_true, y_pred): SUPPORT_SIZE = int(CONFIG["test_sampling_method"][2:]) indices = np.arange(CONFIG["batch_size"]) train_indices = np.where(indices % (SUPPORT_SIZE + 1) == 0)[0] train_y_true = tf.gather(y_true, indices=train_indices) train_y_pred = tf.gather(y_pred, indices=train_indices) # support_indices = np.where(indices % (SUPPORT_SIZE + 1) != 0)[0] support_indices = np.arange(1, SUPPORT_SIZE + 1) support_y_true = tf.gather(y_true, indices=support_indices) support_y_pred = tf.gather(y_pred, indices=support_indices) centers = calc_centers_for_support_tensor( # support_y_true, K.concatenate([train_y_true, support_y_true], axis=0), # support_y_pred, K.concatenate([train_y_pred, support_y_pred], axis=0), ) loss = K.variable(0.0) loss = tf.add( loss, 1.0 * __center_loss( # train_y_true, K.concatenate([train_y_true, support_y_true], axis=0), # train_y_pred, K.concatenate([train_y_pred, support_y_pred], axis=0), centers)) loss = tf.add( loss, 0.5 * __softmax_euclidean_loss( # train_y_true, K.concatenate([train_y_true, support_y_true], axis=0), # train_y_pred, K.concatenate([train_y_pred, support_y_pred], axis=0), centers)) # loss = tf.add( # loss, # 0.1 * __softmax_cosine_loss(train_y_true, train_y_pred, centers) # ) if CONFIG["experiment_id"] == "farther": loss = tf.add(loss, 1.0 * __center_separate_loss(centers)) return loss
def gru_with_r_gate(x, weight): h_tm1, inputs, z, x_h, split_recurrent_h = x[0], x[1], x[2], x[3], x[4] weight = K.variable(weight) units = h_tm1.shape[-1] kernel_r = weight[:units, units:units * 2] recurrent_kernel_r = weight[units:units * 2, units:units * 2] input_bias_r = weight[units * 2, units:units * 2] # Change to 1 dim. x_r = K.bias_add(K.dot(inputs, kernel_r), input_bias_r) recurrent_r = K.dot(h_tm1, recurrent_kernel_r) r_without_activate = x_r + recurrent_r r = hard_sigmoid(r_without_activate) #r = hard_sigmoid(x_r + recurrent_r) # Recompute recurrent_h by two parts. r_unsqueeze = K.expand_dims(r, axis=-1) recompute_recurrent_h = K.sum(r_unsqueeze * split_recurrent_h, axis=1) hh = tanh(x_h + recompute_recurrent_h) h = z * h_tm1 + (1 - z) * hh #return h return r
def __softmax_cosine_loss(y_true, y_pred, centers): y_true_value = K.argmax(y_true) base_loss = K.variable(0.0) for label in range(CONFIG["num_classes"]): center = centers[label] indices = tf.where(tf.equal(y_true_value, label)) pred_per_class = tf.gather_nd(y_pred, indices=indices) distances_per_class = __cosine(pred_per_class, center) sum = K.sum(distances_per_class, axis=-1) base_loss = tf.add(base_loss, sum) base_distances = K.zeros_like(y_true_value, dtype='float32') for label in range(CONFIG["num_classes"]): center = centers[label] distances_with_all_classes = __cosine(y_pred, center) exp_distances = K.exp(-distances_with_all_classes) base_distances = tf.add(base_distances, exp_distances) log_distances = K.log(base_distances) sum_on_batch = K.sum(log_distances) return base_loss + sum_on_batch
def do_2d_pooling(feature_matrix, stride_length_px=2, pooling_type_string=MAX_POOLING_TYPE_STRING): """Pools 2-D feature maps. m = number of rows after pooling n = number of columns after pooling :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x C or 1 x M x N x C. :param stride_length_px: Stride length (pixels). The pooling window will move by this many rows or columns at a time as it slides over each input feature map. :param pooling_type_string: Pooling type (must be accepted by `_check_pooling_type`). :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x m x n x C. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_integer(stride_length_px) error_checking.assert_is_geq(stride_length_px, 2) _check_pooling_type(pooling_type_string) if len(feature_matrix.shape) == 3: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=4) feature_tensor = K.pool2d( x=K.variable(feature_matrix), pool_mode=pooling_type_string, pool_size=(stride_length_px, stride_length_px), strides=(stride_length_px, stride_length_px), padding='valid', data_format='channels_last' ) return feature_tensor.numpy()
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile( K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def _model_setup(train_config, metrics, resume_training=None): if resume_training: model = load_model(os.path.join(resume_training)) train_config.train.initial_epoch = int(resume_training.split('_')[-1]) else: model = _model_init(train_config) train_config.train.initial_epoch = 0 # Setup optimizer optimizer = _get_optimizer(train_config.optimizer, train_config.train.lr.init_lr) optimizer_cat = _get_optimizer(train_config.optimizer, 0.01) if isinstance(model, list): if train_config.optimizer.daug_invariance_params['pct_loss'] + \ train_config.optimizer.class_invariance_params['pct_loss'] == 1.: model_cat = model[1] model_cat.compile(loss=train_config.optimizer.loss, optimizer=optimizer_cat, metrics=metrics) model = model[0] else: model = model[0] model_cat = None else: model_cat = None # Get invariance layers inv_outputs = [ output_name for output_name in model.output_names if '_inv' in output_name ] daug_inv_outputs = [ output_name for output_name in inv_outputs if 'daug_' in output_name ] class_inv_outputs = [ output_name for output_name in inv_outputs if 'class_' in output_name ] mean_inv_outputs = [ output_name for output_name in inv_outputs if 'mean_' in output_name ] train_config.optimizer.n_inv_layers = len(daug_inv_outputs) if train_config.optimizer.invariance: # Determine loss weights for each invariance loss at each layer assert train_config.optimizer.daug_invariance_params['pct_loss'] +\ train_config.optimizer.class_invariance_params['pct_loss'] \ <= 1. no_inv_layers = [] if FLAGS.no_inv_last_layer: no_inv_layers.append(len(daug_inv_outputs)) if FLAGS.no_inv_first_layer: no_inv_layers.append(0) if FLAGS.no_inv_layers: no_inv_layers = [int(layer) - 1 for layer in FLAGS.no_inv_layers] daug_inv_loss_weights = get_invariance_loss_weights( train_config.optimizer.daug_invariance_params, train_config.optimizer.n_inv_layers, no_inv_layers) class_inv_loss_weights = get_invariance_loss_weights( train_config.optimizer.class_invariance_params, train_config.optimizer.n_inv_layers, no_inv_layers) mean_inv_loss_weights = np.zeros(len(mean_inv_outputs)) loss_weight_cat = 1.0 - (np.sum(daug_inv_loss_weights) + \ np.sum(class_inv_loss_weights)) if 'decay_rate' in train_config.optimizer.daug_invariance_params or \ 'decay_rate' in train_config.optimizer.class_invariance_params: loss_weights_tensors = { 'softmax': K.variable(loss_weight_cat, name='w_softmax') } { loss_weights_tensors.update( {output: K.variable(weight, name='w_{}'.format(output))}) for output, weight in zip(daug_inv_outputs, daug_inv_loss_weights) } { loss_weights_tensors.update( {output: K.variable(weight, name='w_{}'.format(output))}) for output, weight in zip(class_inv_outputs, class_inv_loss_weights) } { loss_weights_tensors.update( {output: K.variable(weight, name='w_{}'.format(output))}) for output, weight in zip(mean_inv_outputs, mean_inv_loss_weights) } loss = { 'softmax': weighted_loss(train_config.optimizer.loss, loss_weights_tensors['softmax']) } { loss.update({ output: weighted_loss(invariance_loss, loss_weights_tensors[output]) }) for output in daug_inv_outputs } { loss.update({ output: weighted_loss(invariance_loss, loss_weights_tensors[output]) }) for output in class_inv_outputs } { loss.update({ output: weighted_loss(mean_loss, loss_weights_tensors[output]) }) for output in mean_inv_outputs } loss_weights = [1.] * len(model.outputs) else: loss = {'softmax': train_config.optimizer.loss} { loss.update({output: invariance_loss}) for output in daug_inv_outputs } { loss.update({output: invariance_loss}) for output in class_inv_outputs } {loss.update({output: mean_loss}) for output in mean_inv_outputs} if 'output_inv' in model.outputs: loss.update({'output_inv': None}) loss_weights = {'softmax': loss_weight_cat} { loss_weights.update({output: loss_weight}) for output, loss_weight in zip(daug_inv_outputs, daug_inv_loss_weights) } { loss_weights.update({output: loss_weight}) for output, loss_weight in zip(class_inv_outputs, class_inv_loss_weights) } { loss_weights.update({output: loss_weight}) for output, loss_weight in zip(mean_inv_outputs, mean_inv_loss_weights) } loss_weights_tensors = None metrics_dict = {'softmax': metrics} model.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer, metrics=metrics_dict) else: model.compile(loss=train_config.optimizer.loss, optimizer=optimizer, metrics=metrics) loss_weights_tensors = None # Change metrics names # NOTE: This fails because model has no attribute metrics_names # in newer TF/Keras versions #model = change_metrics_names(model, train_config.optimizer.invariance) if model_cat: model_cat = change_metrics_names(model_cat, False) return model, model_cat, loss_weights_tensors
import tensorflow.compat.v1 as tf import numpy as np import tensorflow.compat.v1.keras as keras import tensorflow.compat.v1.keras.backend as K tf.disable_v2_behavior() #code have been written for TF1 def custom_softmax(x): m = tf.reduce_max(x, 1) x = x - m e = tf.exp(x) return e / tf.reduce_sum(e, -1) a = np.random.randn(1, 1000) tfy = tf.nn.softmax(a) ky = keras.activations.softmax(K.variable(a)) tfc = custom_softmax(a) session = K.get_session() tfy_ = session.run(tfy) ky_ = session.run(ky) tfc_ = session.run(tfc) print("tf vs k", np.abs(tfy_ - ky_).sum()) print("tf vs custom", np.abs(tfy_ - tfc_).sum()) print("custom vs k", np.abs(tfc_ - ky_).sum())