def __loss(y_true, y_pred): kernel_cs_forward, kernel_cs_backward = [], [] for (forward, backward) in layers: kernel_c_forward = forward.cell.trainable_weights[1][:, rnn_units * 2:rnn_units * 3] kernel_c_backward = backward.cell.trainable_weights[1][:, rnn_units * 2:rnn_units * 3] kernel_cs_forward.append(K.reshape(kernel_c_forward, (rnn_units * rnn_units,))) kernel_cs_backward.append(K.reshape(kernel_c_backward, (rnn_units * rnn_units,))) phi_forward = K.stack(kernel_cs_forward) phi_backward = K.stack(kernel_cs_backward) loss_sim_forward = K.sum(K.square(K.dot(phi_forward, K.transpose(phi_forward)) - K.eye(len(layers)))) loss_sim_backward = K.sum(K.square(K.dot(phi_backward, K.transpose(phi_backward)) - K.eye(len(layers)))) loss_cat = keras.losses.categorical_crossentropy(y_true, y_pred) return loss_cat + lmbd * (loss_sim_forward + loss_sim_backward)
def free_running(h, states): prev_generated_output = initial_states[0][1:, :, :] prev_sampled_output = prev_generated_output # switching from (batch_size, previous_layer_input|true_input, output_dim) # to ( previous_layer_input|true_input, batch_size, output_dim) axes = [1, 0] + list(range(2, K.ndim(h))) h = K.permute_dimensions(h, axes) prev_layer_input = h[0:1, :, :] if self.implementation == 0: x_z = prev_layer_input[0, :, :self.units] x_r = prev_layer_input[0, :, self.units: 2 * self.units] x_h = prev_layer_input[0, :, 2 * self.units:] z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_z)) r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_r)) hh = self.activation(x_h + K.dot(r * h_tm1 * rec_dp_mask[2], self.recurrent_kernel_h) + K.dot(r * prev_sampled_output, self.recurrent_kernel_y)) output = z * h_tm1 + (1. - z) * hh final_output = self.output_sampling(output, random_cutoff_vec) return K.stack([output, final_output])
def yolo_eval(yolo_outputs, image_shape, max_boxes=10, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input batch and return filtered boxes.""" box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs boxes = yolo_boxes_to_corners(box_xy, box_wh) boxes, scores, classes = yolo_filter_boxes( boxes, box_confidence, box_class_probs, threshold=score_threshold) # Scale boxes back to original image shape. height = image_shape[0] width = image_shape[1] image_dims = K.stack([height, width, height, width]) image_dims = K.reshape(image_dims, [1, 4]) boxes = boxes * image_dims # TODO: Something must be done about this ugly hack! max_boxes_tensor = K.variable(max_boxes, dtype='int32') K.get_session().run(tf.variables_initializer([max_boxes_tensor])) nms_index = tf.image.non_max_suppression( boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) boxes = K.gather(boxes, nms_index) scores = K.gather(scores, nms_index) classes = K.gather(classes, nms_index) return boxes, scores, classes
def teacher_forced(h, states): # switching from (batch_size, previous_layer_input|true_input, output_dim) # to ( previous_layer_input|true_input, batch_size, output_dim) axes = [1, 0] + list(range(2, K.ndim(h))) h = K.permute_dimensions(h, axes) prev_layer_input = h[0:1, :, :] true_input = h[1:, :, :self.units] # this should correspond to true input prev_sampled_output = true_input if self.implementation == 0: x_z = prev_layer_input[0, :, :self.units] x_r = prev_layer_input[0, :, self.units: 2 * self.units] x_h = prev_layer_input[0, :, 2 * self.units:] else: raise ValueError('Implementation type ' + self.implementation + ' is invalid') z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_z)) r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_r)) hh = self.activation(x_h + K.dot(r * h_tm1 * rec_dp_mask[2], self.recurrent_kernel_h) + K.dot(r * prev_sampled_output, self.recurrent_kernel_y)) output = z * h_tm1 + (1. - z) * hh return K.stack([output, output])
def scale_boxes(boxes, image_shape): """ Scales the predicted boxes in order to be drawable on the image""" height = image_shape[0] width = image_shape[1] image_dims = K.stack([height, width, height, width]) image_dims = K.reshape(image_dims, [1, 4]) boxes = boxes * image_dims return boxes
def get_initial_states(self, x): # build an all-zero tensor of shape [(samples, output_dim), (samples, output_dim)] initial_state = K.zeros_like(x) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=1) # (samples, input_dim) reducer = K.random_uniform((self.input_dim, self.units)) reducer = reducer / K.exp(reducer) initial_state = K.dot(initial_state, reducer) # (samples, output_dim) initial_states = [K.stack([initial_state, initial_state]) for _ in range(len(self.states))] return initial_states
def mean_iou(y_true, y_pred): prec = [] for t in np.arange(0.5, 1.0, 0.05): y_pred_ = tf.to_int32(y_pred > t) score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2) K.get_session().run(tf.local_variables_initializer()) with tf.control_dependencies([up_opt]): score = tf.identity(score) prec.append(score) return K.mean(K.stack(prec), axis=0)
def output_sampling(self, output, rand_matrix): # Generates a sampled selection based on raw output state vector # Creates a cdf vector and compares against a randomly generated vector # Requires a pre-generated rand_matrix (i.e. generated outside step function) sampled_output = output / K.sum(output, axis=-1, keepdims=True) # (batch_size, self.units) mod_sampled_output = sampled_output / K.exp(self.temperature) norm_exp_sampled_output = mod_sampled_output / K.sum(mod_sampled_output, axis=-1, keepdims=True) cdf_vector = K.cumsum(norm_exp_sampled_output, axis=-1) cdf_minus_vector = cdf_vector - norm_exp_sampled_output rand_matrix = K.stack([rand_matrix], axis=0) rand_matrix = K.stack([rand_matrix], axis=2) compared_greater_output = K.cast(K.greater(cdf_vector, rand_matrix), dtype='float32') compared_lesser_output = K.cast(K.less(cdf_minus_vector, rand_matrix), dtype='float32') final_output = compared_greater_output * compared_lesser_output return final_output
def test_keras_unstack_hack(): y_true_np = np.random.random([1, 3, 2]) y_true_np[:, :, 0] = 0 y_true_np[:, :, 1] = 1 y_true_keras = K.variable(y_true_np) y, u = wtte._keras_unstack_hack(y_true_keras) y_true_keras_new = K.stack([y, u], axis=-1) np.testing.assert_array_equal(K.eval(y_true_keras_new), y_true_np)
def _make_regular_grids(self, batch_size, height, width): # making a single regular grid x_linspace = K_linspace(-1., 1., width) y_linspace = K_linspace(-1., 1., height) x_coordinates, y_coordinates = K_meshgrid(x_linspace, y_linspace) x_coordinates = K.flatten(x_coordinates) y_coordinates = K.flatten(y_coordinates) ones = K.ones_like(x_coordinates) grid = K.concatenate([x_coordinates, y_coordinates, ones], 0) # repeating grids for each batch grid = K.flatten(grid) grids = K.tile(grid, K.stack([batch_size])) return K.reshape(grids, (batch_size, 3, height * width))
def time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def build_siamese_model(): inception_model = faceRecoModel((96,96,3)) anchor_input = Input((96,96,3),name="anchor_input") anchor_output = inception_model(anchor_input) positive_input = Input((96,96,3), name="positive_input") positive_output = inception_model(positive_input) negative_input = Input((96,96,3), name="negative_input") negative_output = inception_model(negative_input) #loss = merge([anchor_output, positive_output, negative_output], mode =triplet_loss,name='loss', output_shape= (1,) ) # final = concatenate([anchor_output, positive_output, negative_output], axis=-1) final = Lambda( lambda vects: K.stack(vects, axis=1), name='final')([anchor_output, positive_output,negative_output]) final_model = Model(inputs=[anchor_input, positive_input, negative_input], outputs=final) return final_model
def make_regular_grids(self, batch_size, resampled_size): x_linear_space = tf.lin_space(-1.0, 1.0, resampled_size[1]) y_linear_space = tf.lin_space(-1.0, 1.0, resampled_size[0]) z_linear_space = tf.lin_space(-1.0, 1.0, resampled_size[2]) x_coords, y_coords, z_coords = tf.meshgrid(x_linear_space, y_linear_space, z_linear_space) x_coords = K.flatten(x_coords) y_coords = K.flatten(y_coords) z_coords = K.flatten(z_coords) ones = K.ones_like(x_coords) regular_grid = K.concatenate([x_coords, y_coords, z_coords, ones], axis=0) regular_grid = K.flatten(regular_grid) regular_grids = K.tile(regular_grid, K.stack([batch_size])) regular_grids = K.reshape(regular_grids, (batch_size, 4, resampled_size[0] * resampled_size[1] * resampled_size[2])) return (regular_grids)
def triplet_net(base_model, input_shape=(96, 96, 3)): """ define triplet networks """ # define input: anchor, positive, negative images anchor = Input(shape=input_shape, name='anchor_input') positive = Input(shape=input_shape, name='positive_input') negative = Input(shape=input_shape, name='negative_input') # extract vector represent using CNN base model anc_vec = base_model(anchor) pos_vec = base_model(positive) neg_vec = base_model(negative) # stack outputs stacks = Lambda(lambda x: K.stack(x, axis=1), name='output')([anc_vec, pos_vec, neg_vec]) # define inputs and outputs inputs=[anchor, positive, negative] outputs = stacks # define the triplet model model = Model(inputs=inputs, outputs=outputs, name='triplet_net') return model
def multibox_loss(y_true, y_pred): ground_boxes = y_true[:, :, :4] locs = y_pred[:, :, :4] confs = y_pred[:, :, 4] min_losses = [] for b in range(batchSize): batch_gt = ground_boxes[b] batch_preds = locs[b] + pboxes batch_confs = K.clip(confs[b], 0.0001, 0.9999) conf_sum = K.sum(K.log(1 - batch_confs)) conf_loss = -conf_sum + K.log(1 - batch_confs) - K.log(batch_confs) loc_loss = 0.5 * K.sum(K.square(batch_gt - batch_preds), axis=1) min_loss = K.min(conf_loss + alpha * loc_loss) min_losses.append(min_loss) min_losses_tensor = K.stack(min_losses) return min_losses_tensor
def get_horizontal_rnn_inputs(self, image_patches, forward): """ creates vertical rnn inputs in dimensions (num_patches, batch_size, rnn_input_feature_dim) num_patches: image_patches_height * image_patches_width """ horizontal_rnn_inputs = [] _, image_patches_height, image_patches_width, feature_dim = image_patches.shape if forward: for i in range(image_patches_height): for j in range(image_patches_width): horizontal_rnn_inputs.append(image_patches[:, i, j, :]) else: for i in range(image_patches_height - 1, -1, -1): for j in range(image_patches_width - 1, -1, -1): horizontal_rnn_inputs.append(image_patches[:, i, j, :]) horizontal_rnn_inputs = K.stack(horizontal_rnn_inputs) horizontal_rnn_inputs = keras.backend.permute_dimensions( horizontal_rnn_inputs, (1, 0, 2)) return horizontal_rnn_inputs
def __call__(self, k_true, alpha_pred): # alpha_pred: [sample, dict] alpha_pred_T = K.transpose(alpha_pred) # [dict, sample] dot = K.concatenate([K.dot(g, alpha_pred_T) for g in self.gram_sliced], axis=0) quad = K.batch_dot(alpha_pred_T, dot, axes=0) linear = K.batch_dot(k_true, alpha_pred, axes=1) alpha_g_norm = [ K.sqrt( K.sum(K.square(K.gather(alpha_pred_T, g)), axis=0) + K.epsilon()) for g in self.group_indices ] reg = K.sum(K.stack(alpha_g_norm), axis=0) #alpha_g = K.stack([K.gather(alpha_pred_T, g) for g in self.group_indices]) # [group, dict/group, sample] #alpha_g_norm = K.sqrt(K.sum(K.square(alpha_g), axis=1) + K.epsilon()) # [group, sample] #reg = K.sum(alpha_g_norm, axis=0) return K.mean(.5 * K.flatten(quad) - K.flatten(linear) + self.lmbd * reg) """
def trunc_dynamics(self, y_trunc, params): # x_mean & eigenvector xm, w = K.constant(params['xm']), K.constant(params['W']) # Linear operator & offset operator on chi L, b = K.constant(params['L_y']), K.constant(params['b_y']) # standardardized ys = K.constant(params['y_std']) # recover original state representations and calculate nonlinear terms x = xm + K.dot(y_trunc * ys[:self.dim], K.transpose(w[:, :self.dim])) NX_list = [] NX_list.append(-x[:, 0] * x[:, 2]) NX_list.append(x[:, 0] * x[:, 1]) #NX_list.append(-CdV.alpha[0]*x[:,0]*x[:,2] - CdV.delta[0]*x[:,3]*x[:,5]) #NX_list.append(CdV.alpha[0]*x[:,0]*x[:,1] + CdV.delta[0]*x[:,3]*x[:,4]) #NX_list.append(CdV.epsilon*(x[:,1]*x[:,5] - x[:,2]*x[:,4])) #NX_list.append(-CdV.alpha[1]*x[:,0]*x[:,5] - CdV.delta[1]*x[:,2]*x[:,3]) #NX_list.append(CdV.alpha[1]*x[:,0]*x[:,4] + CdV.delta[1]*x[:,3]*x[:,1]) NX = K.stack(NX_list, axis=-1) dydt_trunc = K.dot(y_trunc, L[:self.dim]) + b + K.dot(NX, w[1:]) / ys return dydt_trunc[:, :self.dim]
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.trainable_weights constraint_grads = K.gradients(f, params) xs = tuple([ K.placeholder(shape=K.int_shape(p), ndim=K.ndim(p)) for p in params ]) tmp = [K.sum(g * x) for g, x in zip(constraint_grads, xs)] Hx_plain_splits = K.gradients(K.sum(K.stack(tmp)), params) print type(inputs) print type(xs) self.hx_fun = K.function( inputs=[K.learning_phase(), inputs + xs], outputs=Hx_plain_splits, ) return self.hx_fun
def yolo_head(feats): # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_xy = (feats[..., :2] + conv_index) / conv_dims * 448 box_wh = feats[..., 2:4] * 448 return box_xy, box_wh
def _compute_valid_seed_region(self, height, width): positions = K.concatenate([ K.expand_dims(K.tile(K.expand_dims(K.arange(height), axis=1), [1, width]), axis=-1), K.expand_dims(K.tile(K.expand_dims(K.arange(width), axis=0), [height, 1]), axis=-1), ], axis=-1) half_block_size = self.block_size // 2 valid_seed_region = K.switch( K.all( K.stack( [ positions[:, :, 0] >= half_block_size, positions[:, :, 1] >= half_block_size, positions[:, :, 0] < height - half_block_size, positions[:, :, 1] < width - half_block_size, ], axis=-1, ), axis=-1, ), K.ones((height, width)), K.zeros((height, width)), ) return K.expand_dims(K.expand_dims(valid_seed_region, axis=0), axis=-1)
def split_heads_2d(self, ip): tensor_shape = K.shape(ip) # batch, height, width, channels for axis = -1 tensor_shape = [tensor_shape[i] for i in range(len(self._shape))] batch = tensor_shape[0] height = tensor_shape[1] width = tensor_shape[2] channels = tensor_shape[3] # Save the spatial tensor dimensions self._batch = batch self._height = height self._width = width ret_shape = K.stack( [batch, height, width, self.num_heads, channels // self.num_heads]) split = K.reshape(ip, ret_shape) transpose_axes = (0, 3, 1, 2, 4) split = K.permute_dimensions(split, transpose_axes) return split
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile( K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def call(self, x): print("call") # Calculate the pairwise distances between the codewords and the feature vectors x_square = K.sum(x**2, axis=3, keepdims=True) y_square = K.sum(self.V**2, axis=2, keepdims=True) #print(K.conv2d(self.V,x, strides=(1, 1), padding='valid').shape) dists = x_square + y_square - 2 * K.conv2d( x, self.V, strides=(1, 1), padding='valid') dists = K.maximum(dists, 0) # Quantize the feature vectors quantized_features = K.softmax(-dists / (self.sigmas**2)) # Compile the histogram if self.spatial_level == 0: histogram = K.mean(quantized_features, [1, 2]) elif self.spatial_level == 1: shape = K.shape(quantized_features) mid_1 = K.cast(shape[1] / 2, 'int32') mid_2 = K.cast(shape[2] / 2, 'int32') histogram1 = K.mean(quantized_features[:, :mid_1, :mid_2, :], [1, 2]) histogram2 = K.mean(quantized_features[:, mid_1:, :mid_2, :], [1, 2]) histogram3 = K.mean(quantized_features[:, :mid_1, mid_2:, :], [1, 2]) histogram4 = K.mean(quantized_features[:, mid_1:, mid_2:, :], [1, 2]) histogram = K.stack( [histogram1, histogram2, histogram3, histogram4], 1) histogram = K.reshape(histogram, (-1, 4 * self.N_k)) else: # No other spatial level is currently supported (it is trivial to extend the code) assert False # Simple trick to avoid rescaling issues return histogram * self.N_k
def call(self, inputs, output_shape=None): updates, mask = inputs[0], inputs[1] with K.tf.variable_scope(self.name): mask = K.cast(mask, 'int32') input_shape = K.tf.shape(updates, out_type='int32') # calculation new shape if output_shape is None: output_shape = ( input_shape[0], input_shape[1]*self.size[0], input_shape[2]*self.size[1], input_shape[3]) self.output_shape1 = output_shape # calculation indices for batch, height, width and feature maps one_like_mask = K.ones_like(mask, dtype='int32') batch_shape = K.concatenate( [[input_shape[0]], [1], [1], [1]], axis=0) batch_range = K.reshape( K.tf.range(output_shape[0], dtype='int32'), shape=batch_shape) b = one_like_mask * batch_range y = mask // (output_shape[2] * output_shape[3]) x = (mask // output_shape[3]) % output_shape[2] feature_range = K.tf.range(output_shape[3], dtype='int32') f = one_like_mask * feature_range # transpose indices & reshape update values to one dimension updates_size = K.tf.size(updates) indices = K.transpose(K.reshape( K.stack([b, y, x, f]), [4, updates_size])) print(K.tf.size(indices) ) values = K.reshape(updates, [updates_size]) ret = K.tf.scatter_nd(indices, values, output_shape) return ret
def _to_normal2d(output_batch) -> ds.MultivariateNormalTriL: """ :param output_batch: (n_samples, 5) :return """ # mean of x and y x_mean = Lambda(lambda o: o[:, 0])(output_batch) y_mean = Lambda(lambda o: o[:, 1])(output_batch) # std of x and y # std is must be 0 or positive x_std = Lambda(lambda o: K.exp(o[:, 2]))(output_batch) y_std = Lambda(lambda o: K.exp(o[:, 3]))(output_batch) # correlation coefficient # correlation coefficient range is [-1, 1] cor = Lambda(lambda o: K.tanh(o[:, 4]))(output_batch) loc = Concatenate()([ Lambda(lambda x_mean: K.expand_dims(x_mean, 1))(x_mean), Lambda(lambda y_mean: K.expand_dims(y_mean, 1))(y_mean) ]) x_var = Lambda(lambda x_std: K.square(x_std))(x_std) y_var = Lambda(lambda y_std: K.square(y_std))(y_std) xy_cor = Multiply()([x_std, y_std, cor]) cov = Lambda(lambda inputs: K.stack(inputs, axis=0))( [x_var, xy_cor, xy_cor, y_var]) cov = Lambda(lambda cov: K.permute_dimensions(cov, (1, 0)))(cov) cov = Reshape((2, 2))(cov) scale_tril = Lambda(lambda cov: tf.cholesky(cov))(cov) mvn = ds.MultivariateNormalTriL(loc, scale_tril) return mvn
def yolo_eval(yolo_outputs, image_shape, max_boxes=10, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input batch and return filtered boxes.""" box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs boxes = yolo_boxes_to_corners(box_xy, box_wh) boxes, scores, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=score_threshold) # Scale boxes back to original image shape. height = image_shape[0] width = image_shape[1] image_dims = K.stack([height, width, height, width]) image_dims = K.reshape(image_dims, [1, 4]) #comment the below line for video. #boxes = boxes * image_dims # TODO: Something must be done about this ugly hack! max_boxes_tensor = K.variable(max_boxes, dtype='int32') K.get_session().run(tf.variables_initializer([max_boxes_tensor])) nms_index = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) nms_index = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold=iou_threshold) boxes = K.gather(boxes, nms_index) scores = K.gather(scores, nms_index) classes = K.gather(classes, nms_index) return boxes, scores, classes
def call(self, input_tensor, training=None): input_shape = K.shape(input_tensor) _, in_height, in_width, _, _ = input_tensor.get_shape() input_transposed = tf.transpose(input_tensor, [0, 3, 1, 2, 4]) input_tensor_reshaped = K.reshape(input_transposed, [ input_shape[0] * input_shape[3], input_shape[1], input_shape[2], self.input_num_atoms]) input_tensor_reshaped.set_shape((None, in_height.value, in_width.value, self.input_num_atoms)) conv = K.conv2d(input_tensor_reshaped, self.W, (self.strides, self.strides), padding=self.padding, data_format='channels_last') votes_shape = K.shape(conv) _, conv_height, conv_width, _ = conv.get_shape() # Reshape back to 6D by splitting first dimmension to batch and input_dim # and splitting last dimmension to output_dim and output_atoms. votes = K.reshape(conv, [input_shape[0], input_shape[3], votes_shape[1], votes_shape[2], self.num_capsule, self.num_atoms]) votes.set_shape((None, self.input_num_capsule, conv_height.value, conv_width.value, self.num_capsule, self.num_atoms)) logit_shape = K.stack([ input_shape[0], input_shape[3], votes_shape[1], votes_shape[2], self.num_capsule]) biases_replicated = K.tile(self.b, [votes_shape[1], votes_shape[2], 1, 1]) activations = _update_routing( votes=votes, biases=biases_replicated, logit_shape=logit_shape, num_dims=6, input_dim=self.input_num_capsule, output_dim=self.num_capsule, num_routing=self.routings, leaky=self.leaky_routing) return activations
def mAPmetric(true, pred): #any shape can go - can't be a loss function tresholds = [0.5 + (i*.05) for i in range(10)] #flattened images (batch, pixels) true = K.batch_flatten(true) pred = K.batch_flatten(pred) pred = castF(K.greater(pred, 0.5)) #total white pixels - (batch,) trueSum = K.sum(true, axis=-1) predSum = K.sum(pred, axis=-1) #has mask or not per image - (batch,) true1 = castF(K.greater(trueSum, 1)) pred1 = castF(K.greater(predSum, 1)) #to get images that have mask in both true and pred truePositiveMask = castB(true1 * pred1) #separating only the possible true positives to check iou testTrue = tf.boolean_mask(true, truePositiveMask) testPred = tf.boolean_mask(pred, truePositiveMask) #getting iou and threshold comparisons iou = iou_loss_core(testTrue,testPred) truePositives = [castF(K.greater(iou, tres)) for tres in tresholds] #mean of thressholds for true positives and total sum truePositives = K.mean(K.stack(truePositives, axis=-1), axis=-1) truePositives = K.sum(truePositives) #to get images that don't have mask in both true and pred trueNegatives = (1-true1) * (1 - pred1) # = 1 -true1 - pred1 + true1*pred1 trueNegatives = K.sum(trueNegatives) return (truePositives + trueNegatives) / castF(K.shape(true)[0])
def _time_distributed_dense(self, x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def call(self, inputs, mask=None): features = inputs[0] # Shape: (None, num_nodes, num_features) A = inputs[1:] # Shapes: (None, num_nodes, num_nodes) eye = A[0] * K.zeros(self.num_nodes, dtype='float32') + K.eye( self.num_nodes, dtype='float32') #eye = K.eye(self.num_nodes, dtype='float32') if self.consecutive_links: eye_len = eye.get_shape().as_list()[0] shifted = tf.concat((eye[-1:, :], eye[:-1, :]), axis=0) A.append(shifted) if self.backward_links: for i in range(len(A)): A.append(K.permute_dimensions(A[i], [0, 2, 1])) if self.self_links: A.append(eye) AHWs = list() for i in range(self.num_adjacency_matrices): if self.edge_weighting: features *= self.W_edges[i] HW = K.dot(features, self.W[i]) # Shape: (None, num_nodes, output_dim) AHW = K.batch_dot(A[i], HW) # Shape: (None, num_nodes, num_features) AHWs.append(AHW) AHWs_stacked = K.stack( AHWs, axis=1) # Shape: (None, num_supports, num_nodes, num_features) output = K.max(AHWs_stacked, axis=1) # Shape: (None, num_nodes, output_dim) if self.bias: output += self.b return self.activation(output)
def call(self, input_tensor, training=None): input_transposed = tf.transpose(input_tensor, [4, 0, 1, 2, 3, 5]) input_shape = K.shape(input_transposed) input_tensor_reshaped = K.reshape(input_transposed, [ input_shape[0] * input_shape[1], self.input_height, self.input_width, self.input_depth, self.input_num_atoms]) input_tensor_reshaped.set_shape((None, self.input_height, self.input_width, self.input_depth, self.input_num_atoms)) conv = K.conv3d(input_tensor_reshaped, self.W, (self.strides, self.strides, self.strides), padding=self.padding, data_format='channels_last', dilation_rate=(1, 1, 1)) votes_shape = K.shape(conv) _, conv_height, conv_width, conv_depth, _ = conv.get_shape() votes = K.reshape(conv, [input_shape[1], input_shape[0], votes_shape[1], votes_shape[2], votes_shape[3], self.num_capsule, self.num_atoms]) votes.set_shape((None, self.input_num_capsule, conv_height.value, conv_width.value, conv_depth.value, self.num_capsule, self.num_atoms)) logit_shape = K.stack([ input_shape[1], input_shape[0], votes_shape[1], votes_shape[2], votes_shape[3], self.num_capsule]) biases_replicated = K.tile(self.b, [conv_height.value, conv_width.value, conv_depth.value, 1, 1]) activations = update_routing( votes=votes, biases=biases_replicated, logit_shape=logit_shape, num_dims=7, input_dim=self.input_num_capsule, output_dim=self.num_capsule, num_routing=self.routings) return activations
def rot3d_from_euler(euler): # Convert Euler-Rodrigues angles to a 3D rotation matrix (with order of rotation X-Y-Z) euler_x = Lambda(lambda x: x[:, :, 0])(euler) euler_y = Lambda(lambda x: x[:, :, 1])(euler) euler_z = Lambda(lambda x: x[:, :, 2])(euler) cx = Lambda(lambda x: K.cos(x))(euler_x) sx = Lambda(lambda x: K.sin(x))(euler_x) cy = Lambda(lambda x: K.cos(x))(euler_y) sy = Lambda(lambda x: K.sin(x))(euler_y) cz = Lambda(lambda x: K.cos(x))(euler_z) sz = Lambda(lambda x: K.sin(x))(euler_z) R11 = Lambda(lambda x: x[0] * x[1])([cy, cz]) R12 = Lambda(lambda x: x[0] * x[1] * x[2] - x[3] * x[4])( [sx, sy, cz, cx, sz]) R13 = Lambda(lambda x: x[0] * x[1] * x[2] + x[3] * x[4])( [cx, sy, cz, sx, sz]) R21 = Lambda(lambda x: x[0] * x[1])([cy, sz]) R22 = Lambda(lambda x: x[0] * x[1] * x[2] + x[3] * x[4])( [sx, sy, sz, cx, cz]) R23 = Lambda(lambda x: x[0] * x[1] * x[2] - x[3] * x[4])( [cx, sy, sz, sx, cz]) R31 = Lambda(lambda x: -x)(sy) R32 = Lambda(lambda x: x[0] * x[1])([sx, cy]) R33 = Lambda(lambda x: x[0] * x[1])([cx, cy]) print("R11 shape: " + str(R11.shape)) R = Lambda(lambda x: K.stack(x, axis=-1))( [R11, R12, R13, R21, R22, R23, R31, R32, R33]) print("R shape: " + str(R.shape)) R = Reshape((-1, 3, 3))(R) print("R shape: " + str(R.shape)) #exit(1) return R
def generate_conv_index_conv_dim(conv_height, conv_width): ''' generate a grid mesh array such that element with index [0, i, j, 0, 2] is (x = j, y = i) in cg coordinate parameters ---------- conv_height: height of conv features (number of rows) conv_width: width of conv features (number of cols) returns ------- A tensor that is the grid mesh array described above. e.g. conv_index, conv_dims = generate_conv_index_conv_dim(9, 9) conv_index[:, 8, 2, :, :] # returns Tensor([[[2., 8.]]]) ''' conv_dims = K.constant([conv_height, conv_width]) conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = tf.tile(conv_height_index, [conv_dims[1]]) conv_width_index = tf.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.floatx()) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.floatx()) return conv_index, conv_dims
def get_relation_vectors(x): objects = [] relations = [] shape = K.int_shape(x) k = 25 # Hyperparameter which controls how many objects are considered keys = [] # Get k unique random objects while k > 0: i = ra.randint(0, shape[1] - 1) j = ra.randint(0, shape[2] - 1) if not (i, j) in keys: keys.append((i, j)) objects.append(x[:, i, j, :]) k -= 1 # Concatenate each pair of objects to form a relation vector for i in range(len(objects)): for j in range(i, len(objects)): relations.append(K.concatenate([objects[i], objects[j]], axis=1)) # Restack objects into Keras tensor [batch, relation_ID, relation_vectors] return K.permute_dimensions(K.stack([r for r in relations], axis=0), [1, 0, 2])
def Rayleigh_Channel_test(self, x, H): """ :param x: :param H: :return: """ print('x_shape', x.shape) print('x[:,:,1]', K.shape(x[:, 1])) print('x', K.shape(x)) print('H', K.shape(self.H)) print('H[0,:]', K.shape(self.H[0, :])) real = H[0, :] * x[:, :, 0] - H[1, :] * x[:, :, 1] imag = H[0, :] * x[:, :, 1] + H[0, :] * x[:, :, 1] noise_r = K.random_normal(K.shape(real), mean=0, stddev=self.noise_std) noise_i = K.random_normal(K.shape(imag), mean=0, stddev=self.noise_std) real = real + noise_r imag = imag + noise_i print('realshape', real.shape) print('imagshape', imag.shape) x = K.stack([real, imag], axis=2) x = tf.Session().run(x) print(x.shape) return x
def competitionMetric2(true, pred): #any shape can go tresholds = [0.5 + (i*.05) for i in range(10)] #flattened images (batch, pixels) true = K.batch_flatten(true) pred = K.batch_flatten(pred) pred = castF(K.greater(pred, 0.5)) #total white pixels - (batch,) trueSum = K.sum(true, axis=-1) predSum = K.sum(pred, axis=-1) #has mask or not per image - (batch,) true1 = castF(K.greater(trueSum, 1)) pred1 = castF(K.greater(predSum, 1)) #to get images that have mask in both true and pred truePositiveMask = castB(true1 * pred1) #separating only the possible true positives to check iou testTrue = tf.boolean_mask(true, truePositiveMask) testPred = tf.boolean_mask(pred, truePositiveMask) #getting iou and threshold comparisons iou = iou_loss_core(testTrue,testPred) truePositives = [castF(K.greater(iou, tres)) for tres in tresholds] #mean of thressholds for true positives and total sum truePositives = K.mean(K.stack(truePositives, axis=-1), axis=-1) truePositives = K.sum(truePositives) #to get images that don't have mask in both true and pred trueNegatives = (1-true1) * (1 - pred1) # = 1 -true1 - pred1 + true1*pred1 trueNegatives = K.sum(trueNegatives) return (truePositives + trueNegatives) / castF(K.shape(true)[0])
def offline_triplet_network(cfg, base_model): """ Function for constructing the triplet model for training deep metrics learning with offline strategy. In offline mining strategy, there are three models for the triplet input (anchor, positive, negative) with shared architecture and weights. This function constructs the model which has three inputs for the triplets and outputs three feature vectors that are stacked together. Arguments: cfg - python dict, that contains the configuration. (check Deep_Metrics_Learning.ipynb) base_model - class type variable Keras.Model, the base model object that contains the base model for offline mining Returns" model - class type variable Keras.Model, full triplet model object that contains three parallel architecture of the base model for offline mining. """ input_shape = (cfg['window_width'], 2 * cfg['num_of_joints']) # define input: query, positive, negative query = Input(shape=input_shape, name="query_input") positive = Input(shape=input_shape, name="positive_input") negative = Input(shape=input_shape, name="negative_input") # construct the base model for the anchor, positive and negative inputs. q_vec = base_model(query) p_vec = base_model(positive) n_vec = base_model(negative) # stack outputs - feature vectors for the anchor, positive and negative sequences. stacks = Lambda(lambda x: K.stack(x, axis=1), name="output")([q_vec, p_vec, n_vec]) # Construct the triplet model. model = Model(inputs=[query, positive, negative], outputs=stacks, name="triplet_network") return model
def call(self, inputs): #{{{ #from keras.layers import Lambda print('inputs shape: ') print(inputs.shape) input_current = inputs all_outputs = [inputs] for i,layer in enumerate(self.conv_layers): output_current = layer(inputs= input_current) #the output_current is 3D tensor #use Lambda layer to ensure output be a keras tensor after slicing linear_output, gated_output = self.half_slice(output_current) #linear_output = Lambda(lambda x:x[:,:,:self.filters], #output_shape= #lambda x:(x[:-1])+(self.filters,))(output_current) #gated_output = Lambda(lambda x:x[:,:,self.filters:], #output_shape= #lambda x:(x[:-1])+(self.filters,))(output_current) input_current = linear_output*self.gate_activation(gated_output) input_current._keras_shape = K.int_shape(linear_output) #output = input_current + inputs all_outputs.append(input_current + inputs) #residual connection output = input_current + inputs if self.return_blocks: print('return all blocks...' ) output = K.stack(all_outputs, axis=1) output = K.squeeze(output, 2) print('output shape: ') print(output.shape) #if self.return_blocks: #return all_outputs #else: return output
def yolo_head(feats, anchors, num_classes, n): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. # TODO: It works with +1, don't know why. box_xy = (box_xy + conv_index + 1) / conv_dims # TODO: Input layer size box_wh = box_wh * anchors_tensor / conv_dims / {0: 32, 1: 16, 2: 8}[n] return box_xy, box_wh, box_confidence, box_class_probs
def shift(shape, stride, anchors): """ Produce shifted anchors based on shape of the map and stride size. Args shape: Shape to shift the anchors over. (h,w) stride: Stride to shift the anchors with over the shape. anchors: The anchors to apply at each location. Returns shifted_anchors: (fh * fw * num_anchors, 4) """ shift_x = (K.arange(0, shape[1], dtype=K.floatx()) + K.constant(0.5, dtype=K.floatx())) * stride shift_y = (K.arange(0, shape[0], dtype=K.floatx()) + K.constant(0.5, dtype=K.floatx())) * stride shift_x, shift_y = tf.meshgrid(shift_x, shift_y) shift_x = K.reshape(shift_x, [-1]) shift_y = K.reshape(shift_y, [-1]) # (4, fh * fw) shifts = K.stack([shift_x, shift_y, shift_x, shift_y], axis=0) # (fh * fw, 4) shifts = K.transpose(shifts) number_anchors = K.shape(anchors)[0] # number of base points = fh * fw k = K.shape(shifts)[0] # (k=fh*fw, num_anchors, 4) shifted_anchors = K.reshape(anchors, [1, number_anchors, 4]) + K.cast( K.reshape(shifts, [k, 1, 4]), K.floatx()) # (k * num_anchors, 4) shifted_anchors = K.reshape(shifted_anchors, [k * number_anchors, 4]) return shifted_anchors
def get_gradients(self, losses, params): # NOTE: argument "losses" (list) instead of a single "loss" if isinstance(losses, list): # Gradients for each tower loss. # NOTE: K.gradients call tf.gradiens with # colocate_gradients_with_ops=True, thus each tf.gradient operation # should be collocated with it's respective loss. We assume losses # to be located at different devices. tower_grads = [K.gradients(loss, params) for loss in losses] # Average gradients. # This should be a synchronization point (for sync SGD) and this # operation will be located according to the scope where the main # Model was defined - should be the parameter server device. grads = K.mean(K.stack(tower_grads, 0)) else: grads = K.gradients(losses, params) if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] if hasattr(self, 'clipvalue') and self.clipvalue > 0: grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] return grads
def create(self): emb_anchor = Input(shape=(self.dimension, ), name='anchor_input') emb_pos = Input(shape=(self.dimension, ), name='pos_input') emb_neg = Input(shape=(self.dimension, ), name='neg_input') # shared layers layer1 = Dense(128, activation='relu', name='first_emb_layer') layer2 = Dense(64, activation='relu', name='last_emb_layer') norm_layer = Lambda(l2Norm, name='norm_layer', output_shape=[64]) encoded_emb = norm_layer(layer2(layer1(emb_anchor))) encoded_emb_pos = norm_layer(layer2(layer1(emb_pos))) encoded_emb_neg = norm_layer(layer2(layer1(emb_neg))) pos_dist = Lambda(euclidean_distance, name='pos_dist')([encoded_emb, encoded_emb_pos]) neg_dist = Lambda(euclidean_distance, name='neg_dist')([encoded_emb, encoded_emb_neg]) def cal_output_shape(input_shape): shape = list(input_shape[0]) assert len(shape) == 2 # only valid for 2D tensors shape[-1] *= 2 return tuple(shape) stacked_dists = Lambda( lambda vects: K.stack(vects, axis=1), name='stacked_dists', output_shape=cal_output_shape)([pos_dist, neg_dist]) self.model = Model([emb_anchor, emb_pos, emb_neg], stacked_dists, name='triple_siamese') self.model.compile(loss=triplet_loss, optimizer=Adam(lr=0.01), metrics=[accuracy])
def call(self, inputs, mask=None): if type(inputs) is not list or len(inputs) != 2: raise Exception('terminal gru runs on list of length 2') X = inputs[0] true_seq = inputs[1] if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(X) # preprocessing makes input into right form for gpu/cpu settings # from original GRU code recurrent_dropout_constants = self.get_constants(X)[0] preprocessed_input = self.preprocess_input(X) ################# ## Section for index matching of true inputs ################# # Basically, we need to add an extra timestep of just 0s for predicting the first timestep output axes = [1, 0] + list(range(2, K.ndim(true_seq))) true_seq = K.permute_dimensions(true_seq, axes) zeros = K.zeros_like(true_seq[:1, :, :]) # add a column of zeros, remove last element true_seq = K.concatenate([zeros, true_seq[:K.int_shape(true_seq)[0] - 1, :, :]], axis=0) shifted_raw_inputs = K.permute_dimensions(true_seq, axes) ## concatenate to have same dimension as preprocessed inputs 3xoutput_dim # only for self.implementation = 0? shifted_raw_inputs = K.concatenate([shifted_raw_inputs, shifted_raw_inputs, shifted_raw_inputs], axis=2) all_inputs = K.stack([preprocessed_input, shifted_raw_inputs]) num_dim = K.ndim(all_inputs) axes = [1, 2, 0] + list(range(3, num_dim)) all_inputs = K.permute_dimensions(all_inputs, axes) # If not using true sequence, want to feed in a tensor of zeros instead. zeros_input_seq = K.zeros_like(preprocessed_input) test_phase_all_inputs = K.stack([preprocessed_input, zeros_input_seq]) test_phase_all_inputs = K.permute_dimensions(test_phase_all_inputs, axes) all_inputs = K.in_train_phase(all_inputs, test_phase_all_inputs) last_output, outputs, states = sampled_rnn(self.step, all_inputs, initial_states, self.units, self.rnd_seed, go_backwards=self.go_backwards, rec_dp_constants=recurrent_dropout_constants, mask=None) if self.return_sequences: return outputs else: return last_output
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile( K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def output_lambda(x, init_alpha=1.0, max_beta_value=5.0, alpha_kernel_scalefactor=None): """Elementwise (Lambda) computation of alpha and regularized beta. - Alpha: (activation) Exponential units seems to give faster training than the original papers softplus units. Makes sense due to logarithmic effect of change in alpha. (initialization) To get faster training and fewer exploding gradients, initialize alpha to be around its scale when beta is around 1.0, approx the expected value/mean of training tte. Because we're lazy we want the correct scale of output built into the model so initialize implicitly; multiply assumed exp(0)=1 by scale factor `init_alpha`. - Beta: (activation) We want slow changes when beta-> 0 so Softplus made sense in the original paper but we get similar effect with sigmoid. It also has nice features. (regularization) Use max_beta_value to implicitly regularize the model (initialization) Fixed to begin moving slowly around 1.0 - Usage .. code-block:: python model.add(TimeDistributed(Dense(2))) model.add(Lambda(wtte.output_lambda, arguments={"init_alpha":init_alpha, "max_beta_value":2.0 })) :param x: tensor with last dimension having length 2 with x[...,0] = alpha, x[...,1] = beta :param init_alpha: initial value of `alpha`. Default value is 1.0. :param max_beta_value: maximum beta value. Default value is 5.0. :param max_alpha_value: maxumum alpha value. Default is `None`. :type x: Array :type init_alpha: Integer :type max_beta_value: Integer :type max_alpha_value: Integer :return x: A positive `Tensor` of same shape as input :rtype: Array """ if max_beta_value is None or max_beta_value > 3: if K.epsilon() > 1e-07 and K.backend() == 'tensorflow': message = "\ Using tensorflow backend and allowing high `max_beta_value` may lead to\n\ gradient NaN during training unless `K.epsilon()` is small.\n\ Call `keras.backend.set_epsilon(1e-08)` to lower epsilon \ " warnings.warn(message) a, b = _keras_unstack_hack(x) # Implicitly initialize alpha: if alpha_kernel_scalefactor is None: a = init_alpha * K.exp(a) else: a = init_alpha * K.exp(alpha_kernel_scalefactor*a) m = max_beta_value if m > 1.05: # some value >>1.0 # shift to start around 1.0 # assuming input is around 0.0 _shift = np.log(m - 1.0) b = K.sigmoid(b - _shift) else: b = K.sigmoid(b) # Clipped sigmoid : has zero gradient at 0,1 # Reduces the small tendency of instability after long training # by zeroing gradient. b = m * K.clip(x=b, min_value=K.epsilon(), max_value=1. - K.epsilon()) x = K.stack([a, b], axis=-1) return x
def simple_test(image_path): image = cv2.imread(image_path, cv2.IMREAD_COLOR) height = image.shape[1] width = image.shape[0] image = cv2.resize(image, (image_w,image_h)) image = image.reshape((1,image_w,image_h,3)) prediction = model.predict(image, batch_size=1) print(prediction.shape) # 1, 13, 13, 125 # Reshape it to 1,13,13,5,25 # 5 anchor boxes at every grid in 13 x 13 # 25 elements of reach anchorbox # probabiliity if an object is present, bx, by, w, h, 20 dim vector for each class p_resh = prediction.reshape(1, 13, 13, 5, 25) print(p_resh.shape) for box_i in range(5): box = p_resh[0][0][0][box_i] pc = box[0] c_scores = box[5:] res = pc * c_scores idx = np.argmax(res) p = class_dict[idx] print("Box No {} score {} box {},{},{},{} class {} ".format(box_i, res[idx], box[1],box[2],box[3],box[4], p)) box_confidence = p_resh[:,:,:,:,0] box_confidence = box_confidence.reshape(1,13,13,5,1) boxes = p_resh[:,:,:,:,1:5] boxes = boxes.reshape(1,13,13,5,4) box_class_prob = p_resh[:,:,:,:,5:] box_class_prob = box_class_prob.reshape(1,13,13,5,20) # Filter the boxes threshold = 0.6 box_scores = np.multiply(box_confidence, box_class_prob) print(box_scores.shape) box_class = K.argmax(box_scores, axis =-1) box_class_scores = K.max(box_scores, axis=-1) # Filtering mask filtering_mask = K.greater_equal(box_class_scores, threshold) with K.get_session() as test: scores = tf.boolean_mask(box_class_scores, filtering_mask).eval() boxes = tf.boolean_mask(boxes, filtering_mask).eval() classes = tf.boolean_mask(box_class, filtering_mask).eval() print(boxes.shape) print(classes.shape) print(scores.shape) max_boxes = 5 iou_threshold = 0.6 max_boxes_tensor = K.variable(max_boxes, dtype='int32') # tensor to be used in tf.image.non_max_suppression() test.run(tf.variables_initializer([max_boxes_tensor]))# initialize variable max_boxes_tensor # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) scores = K.gather(scores, nms_indices).eval() boxes = K.gather(boxes, nms_indices).eval() classes = K.gather(classes, nms_indices).eval() print(boxes.shape) print(classes.shape) print(scores.shape) # scale the boxes image_dims = K.stack([height, width, height, width]) image_dims = K.reshape(image_dims, [1, 4]) boxes = boxes * image_dims print(boxes.eval())
def call(self, inputs, training=None, mask=None): input_shape = K.shape(inputs) if self.rank == 1: input_shape = [input_shape[i] for i in range(3)] batch_shape, dim, channels = input_shape xx_range = K.tile(K.expand_dims(K.arange(0, dim), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=-1) xx_channels = K.cast(xx_range, K.floatx()) xx_channels = xx_channels / K.cast(dim - 1, K.floatx()) xx_channels = (xx_channels * 2) - 1. outputs = K.concatenate([inputs, xx_channels], axis=self.axis) if self.rank == 2: if self.data_format == 'channels_first': inputs = K.permute_dimensions(inputs, [0, 2, 3, 1]) input_shape = [input_shape[i] for i in range(4)] batch_shape, dim1, dim2, channels = input_shape xx_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32') xx_ones = K.expand_dims(xx_ones, axis=-1) xx_range = K.tile(K.expand_dims(K.arange(0, dim1), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=1) xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1]) xx_channels = K.expand_dims(xx_channels, axis=-1) xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3]) yy_ones = K.ones(K.stack([batch_shape, dim1]), dtype='int32') yy_ones = K.expand_dims(yy_ones, axis=1) yy_range = K.tile(K.expand_dims(K.arange(0, dim2), axis=0), K.stack([batch_shape, 1])) yy_range = K.expand_dims(yy_range, axis=-1) yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1]) yy_channels = K.expand_dims(yy_channels, axis=-1) yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3]) xx_channels = K.cast(xx_channels, K.floatx()) xx_channels = xx_channels / K.cast(dim1 - 1, K.floatx()) xx_channels = (xx_channels * 2) - 1. yy_channels = K.cast(yy_channels, K.floatx()) yy_channels = yy_channels / K.cast(dim2 - 1, K.floatx()) yy_channels = (yy_channels * 2) - 1. outputs = K.concatenate([inputs, xx_channels, yy_channels], axis=self.axis) if self.use_radius: rr = K.sqrt(K.square(xx_channels - 0.5) + K.square(yy_channels - 0.5)) outputs = K.concatenate([outputs, rr], axis=-1) if self.data_format == 'channels_first': outputs = K.permute_dimensions(outputs, [0, 3, 1, 2]) if self.rank == 3: if self.data_format == 'channels_first': inputs = K.permute_dimensions(inputs, [0, 2, 3, 4, 1]) input_shape = [input_shape[i] for i in range(5)] batch_shape, dim1, dim2, dim3, channels = input_shape xx_ones = K.ones(K.stack([batch_shape, dim3]), dtype='int32') xx_ones = K.expand_dims(xx_ones, axis=-1) xx_range = K.tile(K.expand_dims(K.arange(0, dim2), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=1) xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1]) xx_channels = K.expand_dims(xx_channels, axis=-1) xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3]) xx_channels = K.expand_dims(xx_channels, axis=1) xx_channels = K.tile(xx_channels, [1, dim1, 1, 1, 1]) yy_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32') yy_ones = K.expand_dims(yy_ones, axis=1) yy_range = K.tile(K.expand_dims(K.arange(0, dim3), axis=0), K.stack([batch_shape, 1])) yy_range = K.expand_dims(yy_range, axis=-1) yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1]) yy_channels = K.expand_dims(yy_channels, axis=-1) yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3]) yy_channels = K.expand_dims(yy_channels, axis=1) yy_channels = K.tile(yy_channels, [1, dim1, 1, 1, 1]) zz_range = K.tile(K.expand_dims(K.arange(0, dim1), axis=0), K.stack([batch_shape, 1])) zz_range = K.expand_dims(zz_range, axis=-1) zz_range = K.expand_dims(zz_range, axis=-1) zz_channels = K.tile(zz_range, [1, 1, dim2, dim3]) zz_channels = K.expand_dims(zz_channels, axis=-1) xx_channels = K.cast(xx_channels, K.floatx()) xx_channels = xx_channels / K.cast(dim2 - 1, K.floatx()) xx_channels = xx_channels * 2 - 1. yy_channels = K.cast(yy_channels, K.floatx()) yy_channels = yy_channels / K.cast(dim3 - 1, K.floatx()) yy_channels = yy_channels * 2 - 1. zz_channels = K.cast(zz_channels, K.floatx()) zz_channels = zz_channels / K.cast(dim1 - 1, K.floatx()) zz_channels = zz_channels * 2 - 1. outputs = K.concatenate([inputs, zz_channels, xx_channels, yy_channels], axis=self.axis) if self.data_format == 'channels_first': outputs = K.permute_dimensions(outputs, [0, 4, 1, 2, 3]) return outputs