def correct_boxes(box_xy, box_wh, input_shape, image_shape): '''Get corrected boxes''' box_yx = box_xy[..., ::-1] box_hw = box_wh[..., ::-1] input_shape = K.cast(input_shape, K.dtype(box_yx)) image_shape = K.cast(image_shape, K.dtype(box_yx)) new_shape = K.round(image_shape * K.min(input_shape / image_shape)) offset = (input_shape - new_shape) / 2. / input_shape scale = input_shape / new_shape box_yx = (box_yx - offset) * scale box_hw *= scale box_mins = box_yx - (box_hw / 2.) box_maxes = box_yx + (box_hw / 2.) boxes = K.concatenate([ box_mins[..., 0:1], # y_min box_mins[..., 1:2], # x_min box_maxes[..., 0:1], # y_max box_maxes[..., 1:2] # x_max ]) # Scale boxes back to original image shape. boxes *= K.concatenate([image_shape, image_shape]) return boxes
def _transform(theta, input, downsample_factor): num_batch, num_channels, height, width = input.shape theta = K.reshape(theta, (-1, 2, 3)) # grid of (x_t, y_t, 1), eq (1) in ref [2] height_f = K.cast(height, 'float32') width_f = K.cast(width, 'float32') out_height = K.cast(height_f // downsample_factor, 'int64') out_width = K.cast(width_f // downsample_factor, 'int64') grid = _meshgrid(out_height, out_width) # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) T_g = K.dot(theta, grid) x_s, y_s = T_g[:, 0], T_g[:, 1] x_s_flat = x_s.flatten() y_s_flat = y_s.flatten() # dimshuffle input to (bs, height, width, channels) #input_dim = input.dimshuffle(0, 2, 3, 1) input_dim = input.transpose(0, 2, 3, 1) input_transformed = _interpolate( input_dim, x_s_flat, y_s_flat, downsample_factor) output = K.reshape(input_transformed, (num_batch, out_height, out_width, num_channels)) output = output.transpose(0, 3, 1, 2) return output
def loss(self, y_true, y_pred): """ categorical crossentropy loss """ if self.crop_indices is not None: y_true = utils.batch_gather(y_true, self.crop_indices) y_pred = utils.batch_gather(y_pred, self.crop_indices) if self.use_float16: y_true = K.cast(y_true, 'float16') y_pred = K.cast(y_pred, 'float16') # scale and clip probabilities # this should not be necessary for softmax output. y_pred /= K.sum(y_pred, axis=-1, keepdims=True) y_pred = K.clip(y_pred, K.epsilon(), 1) # compute log probability log_post = K.log(y_pred) # likelihood # loss loss = - y_true * log_post # weighted loss if self.weights is not None: loss *= self.weights if self.vox_weights is not None: loss *= self.vox_weights # take the total loss # loss = K.batch_flatten(loss) mloss = K.mean(K.sum(K.cast(loss, 'float32'), -1)) tf.verify_tensor_all_finite(mloss, 'Loss not finite') return mloss
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(a) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, a] return result
def yolo_head(feats, anchors, num_classes, input_shape): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) return box_xy, box_wh, box_confidence, box_class_probs
def call(self, x, mask=None): selection = K.cast(x < self.threshold, 'float32') selection = gaussian_filter_2d(selection, sigma=self.sigma) selection = K.cast(selection > self.smooth_threshold, 'float32') selection = gaussian_filter_2d(selection, sigma=2/3) return selection
def call(self, x): r = K.cast(K.arange(self.num), K.floatx()) / float(self.num - 1) r = self.start + (self.stop - self.start) * r r = K.expand_dims(K.expand_dims(r), axis=0) r = K.cast(r, dtype=K.floatx()) r = K.tile(r, (K.shape(x)[0], 1, 1)) return r
def call(self, x, mask=None): input_shape = K.shape(x) if self.dim_ordering == 'th': num_rows = input_shape[2] num_cols = input_shape[3] elif self.dim_ordering == 'tf': num_rows = input_shape[1] num_cols = input_shape[2] row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list] col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list] outputs = [] if self.dim_ordering == 'th': for pool_num, num_pool_regions in enumerate(self.pool_list): for ix in range(num_pool_regions): for jy in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [input_shape[0], input_shape[1], y2 - y1, x2 - x1] x_crop = x[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': for pool_num, num_pool_regions in enumerate(self.pool_list): for ix in range(num_pool_regions): for jy in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [input_shape[0], y2 - y1, x2 - x1, input_shape[3]] x_crop = x[:, y1:y2, x1:x2, :] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(1, 2)) outputs.append(pooled_val) outputs = K.concatenate(outputs) return outputs
def get_split_averages(input_tensor, input_mask, indices): # Splits input tensor into three parts based on the indices and # returns average of values prior to index, values at the index and # average of values after the index. # input_tensor: (batch_size, input_length, input_dim) # input_mask: (batch_size, input_length) # indices: (batch_size, 1) # (1, input_length) length_range = K.expand_dims(K.arange(K.shape(input_tensor)[1]), dim=0) # (batch_size, input_length) batched_range = K.repeat_elements(length_range, K.shape(input_tensor)[0], 0) tiled_indices = K.repeat_elements(indices, K.shape(input_tensor)[1], 1) # (batch_size, input_length) greater_mask = K.greater(batched_range, tiled_indices) # (batch_size, input_length) lesser_mask = K.lesser(batched_range, tiled_indices) # (batch_size, input_length) equal_mask = K.equal(batched_range, tiled_indices) # (batch_size, input_length) # We also need to mask these masks using the input mask. # (batch_size, input_length) if input_mask is not None: greater_mask = switch(input_mask, greater_mask, K.zeros_like(greater_mask)) lesser_mask = switch(input_mask, lesser_mask, K.zeros_like(lesser_mask)) post_sum = K.sum(switch(K.expand_dims(greater_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) pre_sum = K.sum(switch(K.expand_dims(lesser_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) values_at_indices = K.sum(switch(K.expand_dims(equal_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) post_normalizer = K.expand_dims(K.sum(greater_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) pre_normalizer = K.expand_dims(K.sum(lesser_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) return K.cast(pre_sum / pre_normalizer, 'float32'), values_at_indices, K.cast(post_sum / post_normalizer, 'float32')
def call(self, x, mask=None): # eij = K.dot(x, self.W) TF backend doesn't support it # features_dim = self.W.shape[0] # step_dim = x._keras_shape[1] features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a # print weigthted_input.shape return K.sum(weighted_input, axis=1)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta*box_delta_scale) confidence_loss = object_mask * K.square(1-pred_confidence) + \ (1-object_mask) * K.square(0-pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs-pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def _get_anchor_positive_triplet_mask(self, y_true: Tensor, pairwise_dist: Tensor) -> Tensor: # mask label(a) != label(p) mask1 = K.equal(K.expand_dims(y_true, 0), K.expand_dims(y_true, 1)) mask1 = K.cast(mask1, K.dtype(pairwise_dist)) # mask a == p mask2 = K.not_equal(pairwise_dist, 0.0) mask2 = K.cast(mask2, K.dtype(pairwise_dist)) return mask1 * mask2
def _linspace(start, stop, num): # produces results identical to: # np.linspace(start, stop, num) start = K.cast(start, 'float32') stop = K.cast(stop, 'float32') num = K.cast(num, 'float32') step = (stop-start)/(num-1) return T.arange(num, dtype='float32')*step+start
def test_sparse_categorical_accuracy_correctness(): y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx()) y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx()) # use one_hot embedding to convert sparse labels to equivalent dense labels y_a_dense_labels = K.cast(K.one_hot(K.cast(y_a, dtype='int32'), num_classes=7), dtype=K.floatx()) sparse_categorical_acc = metrics.sparse_categorical_accuracy(y_a, y_b) categorical_acc = metrics.categorical_accuracy(y_a_dense_labels, y_b) assert np.allclose(K.eval(sparse_categorical_acc), K.eval(categorical_acc))
def rec_S(y_true, y_pred): s_flow = K.variable(np.array([1,0])) p = K.cast(K.equal(K.argmax(s_flow, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()) n = K.cast(K.not_equal(K.argmax(s_flow, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()) t = K.cast(K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()) f = K.cast(K.not_equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()) tp = t*p fn = f*n return K.sum(tp) / (K.sum(tp) + K.sum(fn))
def rec_L(y_true, y_pred): s_flow = K.variable(np.array([1,0])) p = K.cast(K.equal(K.argmax(s_flow, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()) n = K.cast(K.not_equal(K.argmax(s_flow, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()) t = K.cast(K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()) f = K.cast(K.not_equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()) tn = t*n fp = f*p return K.sum(tn) / (K.sum(tn) + K.sum(fp))
def _get_semihard_anchor_negative_triplet_mask(self, negative_dist: Tensor, hardest_positive_dist: Tensor, mask_negative: Tensor) -> Tensor: # mask max(dist(a,p)) < dist(a,n) mask = K.greater(negative_dist, hardest_positive_dist) mask = K.cast(mask, K.dtype(negative_dist)) mask_semihard = K.cast(K.expand_dims(K.greater(K.sum(mask, 1), 0.0), 1), K.dtype(negative_dist)) mask = mask_negative * (1 - mask_semihard) + mask * mask_semihard return mask
def fn(y_true, y_pred): class_id_true = K.argmax(y_true, axis=-1) class_id_preds = K.argmax(y_pred, axis=-1) # Replace class_id_preds with class_id_true for recall here accuracy_mask = K.cast(K.equal(class_id_true, interesting_class_id), 'int32') # accuracy_mask = K.cast(K.equal(class_id_true, interesting_class_id), 'int32') class_acc_tensor = K.cast(K.equal(class_id_true, class_id_preds), 'int32') * accuracy_mask class_acc = K.sum(class_acc_tensor) / K.maximum(K.sum(accuracy_mask), 1) return class_acc
def call(self, x, mask=None): if mask is not None: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask, axis=-1) s = K.sum(mask, axis=1) if K.equal(s, K.zeros_like(s)) is None: return K.mean(x, axis=1) else: return K.cast(K.sum(x * mask, axis=1) / K.sqrt(s), K.floatx()) else: return K.sum(x, axis=1)/K.sqrt(len(x))
def call(self, x, mask=None): num = self.input_length if mask: num = mask.sum(-1,keepdims=True) num = K.cast(num,K.floatx()) mask = K.expand_dims(mask,-1) _x = x*K.cast(mask,K.floatx()) else: _x = x if not self.ave: num = 1 return K.sum(_x,1)/num
def call(self, x, mask=None): if mask is not None: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask, axis=-1) s = K.sum(mask, axis=1) if K.equal(s, K.zeros_like(s)) is None: return K.mean(x, axis=1) else: return K.cast(K.sum(x * mask, axis=1) / (K.sqrt(s) + K.constant(1e-10, dtype=K.floatx())), K.floatx()) else: print (x) return K.mean(x, axis=1)
def softmax(x, axis, mask=None): if mask is None: mask = K.constant(True) mask = K.cast(mask, K.floatx()) if K.ndim(x) is K.ndim(mask) + 1: mask = K.expand_dims(mask) m = K.max(x, axis=axis, keepdims=True) e = K.exp(x - m) * mask s = K.sum(e, axis=axis, keepdims=True) s += K.cast(K.cast(s < K.epsilon(), K.floatx()) * K.epsilon(), K.floatx()) return e / s
def call(self, inputs, mask=None): def norm_scale(a): return (a + 1) / 2 x, scale_black, scale_white, shift = inputs scale_black = norm_scale(scale_black) black_selection = K.cast(x < 0.5, K.floatx()) white_selection = K.cast(x >= 0.5, K.floatx()) black_scaled = x*black_selection*(1 - scale_black*self.scale_factor) white_scaled = x*white_selection*(1 - scale_white*self.scale_factor) scaled = black_scaled + white_scaled return scaled + self.shift_factor*shift
def call(self, x, mask=None): e = K.dot(x, self.W) if self.bias: e += self.b e = K.tanh(e) e = K.reshape(K.dot(e, self.U), (-1, self.timesteps)) a = K.exp(e) if mask is not None: a *= K.cast(mask, K.floatx()) a_weights = a / K.cast(K.sum(a, axis=-1, keepdims=True) + K.epsilon(), K.floatx()) weighted_output = x * K.expand_dims(a_weights, axis=-1) return [K.mean(weighted_output, axis=1), a_weights]
def get_w(self, x, mask=None): input_shape = K.int_shape(x) features_dim = self.features_dim step_dim = input_shape[1] eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b[:input_shape[1]] eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) return a
def loss(y_true, y_pred): from plasma.conf import conf fac = MaxHingeTarget.fac #overall_fac = np.prod(np.array(K.shape(y_pred)[1:]).astype(np.float32)) overall_fac = K.prod(K.cast(K.shape(y_pred)[1:],K.floatx())) max_val = K.max(y_pred,axis=-2) #temporal axis! max_val1 = K.repeat(max_val,K.shape(y_pred)[-2]) mask = K.cast(K.equal(max_val1,y_pred),K.floatx()) y_pred1 = mask * y_pred + (1-mask) * y_true weight_mask = K.mean(y_true,axis=-1) weight_mask = K.cast(K.greater(weight_mask,0.0),K.floatx()) #positive label! weight_mask = fac*weight_mask + (1 - weight_mask) #return weight_mask*squared_hinge(y_true,y_pred1) return conf['model']['loss_scale_factor']*overall_fac*weight_mask*hinge(y_true,y_pred1)
def weighted_bce_dice_loss(y_true, y_pred): y_true = K.cast(y_true, 'float32') y_pred = K.cast(y_pred, 'float32') # if we want to get same size of output, kernel size must be odd number averaged_mask = K.pool2d( y_true, pool_size=(11, 11), strides=(1, 1), padding='same', pool_mode='avg') border = K.cast(K.greater(averaged_mask, 0.005), 'float32') * K.cast(K.less(averaged_mask, 0.995), 'float32') weight = K.ones_like(averaged_mask) w0 = K.sum(weight) weight += border * 2 w1 = K.sum(weight) weight *= (w0 / w1) loss = weighted_bce_loss(y_true, y_pred, weight) + \ weighted_dice_loss(y_true, y_pred, weight) return loss
def call(self, inputs, mask=None): if not isinstance(inputs, list) or len(inputs) <= 1: raise TypeError('SpkLifeLongMemory must be called on a list of tensors ' '(at least 2). Got: ' + str(inputs)) # (None(batch), 1), index of speaker target_spk_l = inputs[0] target_spk_l = K.reshape(target_spk_l, (target_spk_l.shape[0], )) if K.dtype(target_spk_l) != 'int32': target_spk_l = K.cast(target_spk_l, 'int32') # (None(batch), embed_dim) spk_vector_l = inputs[1] # Start to update life-long memory based on the learned speech vector # First do normalization spk_vector_eps = K.switch(K.equal(spk_vector_l, 0.), np.spacing(1), spk_vector_l) # avoid zero spk_vector_eps = K.sqrt(K.sum(spk_vector_eps**2, axis=1)) spk_vector_eps = spk_vector_eps.dimshuffle((0, 'x')) spk_vector = T.true_div(spk_vector_l, K.repeat_elements(spk_vector_eps, self.vec_dim, axis=1)) # Store speech vector into life-long memory according to the speaker identity. life_long_mem = T.inc_subtensor(self.life_long_mem[target_spk_l, :], spk_vector) # Normalization for memory life_long_mem_eps = K.switch(K.equal(life_long_mem, 0.), np.spacing(1), life_long_mem) # avoid 0 life_long_mem_eps = K.sqrt(K.sum(life_long_mem_eps**2, axis=1)) life_long_mem_eps = life_long_mem_eps.dimshuffle((0, 'x')) life_long_mem = T.true_div(life_long_mem, K.repeat_elements(life_long_mem_eps, self.vec_dim, axis=1)) # (None(batch), spk_size, embed_dim) return life_long_mem
def call(self, x, mask=None): if hasattr(x, '_keras_shape'): input_shape = x._keras_shape else: input_shape = self._input_shape #import pdb #pdb.set_trace() #if self.last_two is not None: # last2 = self.last_two #else: # input_shape = x._keras_shape # last2 = input_shape[-2:] #out_shape = K.shape(x)[:-2] x = K.reshape(x, (-1,) + input_shape[-2:]) # (batch * d1 * ... * dn-2, dn-1, dn) if mask is not None: mask_shape = (K.shape(x)[0], -1) mask = K.reshape(mask, mask_shape) # give it the same first dim y = self.layer.call(x, mask) #try: #output_shape = self.get_output_shape_for(K.shape(x)) #except: output_shape = self.get_output_shape_for(input_shape) #import pdb #pdb.set_trace() return K.cast(K.reshape(y, output_shape), K.floatx())
def set_batch_function(self, model, input_shape, batch_size, nb_actions, gamma): input_dim = np.prod(input_shape) samples = K.placeholder(shape=(batch_size, input_dim * 2 + 3)) S = samples[:, 0 : input_dim] a = samples[:, input_dim] a = K.cast(a, '') r = samples[:, input_dim + 1] S_prime = samples[:, input_dim + 2 : 2 * input_dim + 2] game_over = samples[:, 2 * input_dim + 2 : 2 * input_dim + 3] r = K.reshape(r, (batch_size, 1)) r = K.repeat(r, nb_actions) r = K.reshape(r, (batch_size, nb_actions)) game_over = K.repeat(game_over, nb_actions) game_over = K.reshape(game_over, (batch_size, nb_actions)) S = K.reshape(S, (batch_size, ) + input_shape) S_prime = K.reshape(S_prime, (batch_size, ) + input_shape) X = K.concatenate([S, S_prime], axis=0) Y = model(X) Qsa = K.max(Y[batch_size:], axis=1) Qsa = K.reshape(Qsa, (batch_size, 1)) Qsa = K.repeat(Qsa, nb_actions) Qsa = K.reshape(Qsa, (batch_size, nb_actions)) delta = K.reshape(self.one_hot(a, nb_actions), (batch_size, nb_actions)) targets = (1 - delta) * Y[:batch_size] + delta * (r + gamma * (1 - game_over) * Qsa) self.batch_function = K.function(inputs=[samples], outputs=[S, targets])
def call(self, inputs, mask=None): return inputs * K.cast(K.greater(inputs, self.theta), K.floatx())
def kde_condentropy(output, var): # Return entropy of a multivariate Gaussian, in nats dims = K.cast(K.shape(output)[1], K.floatx()) normconst = (dims / 2.0) * K.log(2 * np.pi * var) return normconst
def call(self, x, mask=None): return k.cast(super(SpikeFlatten, self).call(x), k.floatx())
def accuracy(self, y_true, y_pred): '''Compute classification accuracy with a fixed threshold on distances. ''' return K.mean( K.equal(y_true, K.cast(y_pred < self.threshold, y_true.dtype)))
def accuracy(y_true, y_pred): y_true = K.cast(y_true, y_pred.dtype) y_true = K.argmax(y_true) # y_pred1 = K.argmax(y_pred) res = K.in_top_k(y_pred, y_true, k_best) return res
def reverse_sequence(self, x, mask): """这里的mask.shape是[batch_size, seq_len, 1] """ seq_len = K.round(K.sum(mask, 1)[:, 0]) seq_len = K.cast(seq_len, 'int32') return tf.reverse_sequence(x, seq_len, seq_dim=1)
def _compute_eta_t(cls): PI = 3.141592653589793 t_frac = K.cast(cls.t_cur / cls.total_iterations, 'float32') eta_t = cls.eta_min + 0.5 * (cls.eta_max - cls.eta_min) * \ (1 + K.cos(PI * t_frac)) return eta_t
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def call(self, inputs, **kwargs): return K.cast(K.argmin(inputs, axis=1), dtype=K.floatx())
dream = model.input print('Model loaded.') # Get the symbolic outputs of each "key" layer (we gave them unique names). layer_dict = dict([(layer.name, layer) for layer in model.layers]) # Define the loss. loss = K.variable(0.) for layer_name in settings['features']: # Add the L2 norm of the features of a layer to the loss. assert layer_name in layer_dict.keys( ), 'Layer ' + layer_name + ' not found in model.' coeff = settings['features'][layer_name] x = layer_dict[layer_name].output # We avoid border artifacts by only involving non-border pixels in the loss. scaling = K.prod(K.cast(K.shape(x), 'float32')) if K.image_data_format() == 'channels_first': loss += coeff * K.sum(K.square(x[:, :, 2:-2, 2:-2])) / scaling else: loss += coeff * K.sum(K.square(x[:, 2:-2, 2:-2, :])) / scaling # Compute the gradients of the dream wrt the loss. grads = K.gradients(loss, dream)[0] # Normalize gradients. grads /= K.maximum(K.mean(K.abs(grads)), K.epsilon()) # Set up function to retrieve the value # of the loss and gradients given an input image. outputs = [loss, grads] fetch_loss_and_grads = K.function([dream], outputs)
def build(self, mode, subnet, config): assert mode in ["training", "inference"] input_image = KL.Input(shape=[64, 64, 3], dtype=tf.float32) input_bboxes = KL.Input(shape=[None, 4], dtype=tf.float32) input_class_ids = KL.Input(shape=[None], dtype=tf.int32) input_active_ids = KL.Input(shape=[4, ], dtype=tf.int32) input_rpn_match = KL.Input(shape=[None, 1], dtype=tf.int32) input_rpn_bbox = KL.Input(shape=[None, 4], dtype=tf.float32) h, w = config.image_size[: 2] image_scale = K.cast(K.stack([h, w, h, w], axis=0), tf.float32) gt_bboxes = KL.Lambda(lambda x: x / image_scale)(input_bboxes) feature_map = resNet_featureExtractor(input_image) rpn_class, rpn_prob, rpn_bbox = rpn_net(feature_map, 9) anchors = utils.anchor_gen(featureMap_size=[8, 8], ratios=config.ratios, scales=config.scales, \ rpn_stride=config.rpn_stride, anchor_stride=config.anchor_stride) proposals = proposal_func.proposal(proposal_count=16, nms_thresh=0.7, anchors=anchors, \ batch_size=20, config=config)([rpn_prob, rpn_bbox]) if mode == "training": target_rois, target_class_ids, target_delta, target_bboxes = detection_target_fixed.DetectionTarget( config=config, \ name="proposal_target")([proposals, input_class_ids, gt_bboxes]) denomrlaize_rois = KL.Lambda(lambda x: 8.0 * x, name="denormalized_rois")(target_rois) mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpn_classifiler(feature_map, denomrlaize_rois, 20, 21, 7, 4) loss_rpn_match = KL.Lambda(lambda x: rpn_class_loss(*x), name="loss_rpn_match")( [input_rpn_match, rpn_class]) loss_rpn_bbox = KL.Lambda(lambda x: rpn_bbox_loss(*x), name="loss_rpn_bbox")( [input_rpn_bbox, input_rpn_match, rpn_bbox]) bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="bbox_loss")( [target_delta, target_class_ids, mrcnn_bbox]) class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graphV2(*x), name="mrcnn_class_loss")( [target_class_ids, mrcnn_class_logits, input_active_ids]) if subnet == "rpn": model = Model( [input_image, input_bboxes, input_class_ids, input_active_ids, input_rpn_match, input_rpn_bbox], [feature_map, rpn_class, rpn_prob, rpn_bbox, proposals, target_rois, denomrlaize_rois, target_class_ids, target_delta, target_bboxes, \ loss_rpn_match, loss_rpn_bbox]) elif subnet == "all": model = Model( [input_image, input_bboxes, input_class_ids, input_active_ids, input_rpn_match, input_rpn_bbox], [feature_map, rpn_class, rpn_prob, rpn_bbox, proposals, target_rois, denomrlaize_rois, target_class_ids, target_delta, target_bboxes, \ mrcnn_class_logits, mrcnn_class, mrcnn_bbox, loss_rpn_match, loss_rpn_bbox, bbox_loss, class_loss]) if mode == "inference": denomrlaize_proposals = KL.Lambda(lambda x: 8.0 * x, name="denormalized_proposals")(proposals) mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpn_classifiler(feature_map, denomrlaize_proposals, 20, 16, 7, 4) detections = DetectionLayer()([proposals, mrcnn_class, mrcnn_bbox]) model = Model([input_image], [detections]) return model
def binary_PFA(y_true, y_pred, threshold=K.variable(value=0.5)): y_pred = K.cast(y_pred >= threshold, 'float32') N = K.sum(1 - y_true) FP = K.sum(y_pred - y_pred * y_true) return FP / N
def castB(x): return K.cast(x, bool)
def build_model(): x_in = Input(shape=(None, )) yl_in = Input(shape=(None, )) yr_in = Input(shape=(None, )) x, yl, yr = x_in, yl_in, yr_in x_mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x) y_mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(yl) x_one_hot = Lambda(to_one_hot)([x, x_mask]) x_prior = ScaleShift()(x_one_hot) # 学习输出的先验分布(标题的字词很可能在文章出现过) embedding = Embedding(len(chars) + 4, char_size) x = embedding(x) # encoder,双层双向LSTM x = LayerNormalization()(x) x = OurBidirectional(CuDNNLSTM(z_dim // 2, return_sequences=True))([x, x_mask]) x = LayerNormalization()(x) x = OurBidirectional(CuDNNLSTM(z_dim // 2, return_sequences=True))([x, x_mask]) x_max = Lambda(seq_maxpool)([x, x_mask]) # 正向decoder,单向LSTM y = embedding(yl) y = SelfModulatedLayerNormalization(z_dim // 4)([y, x_max]) y = CuDNNLSTM(z_dim, return_sequences=True)(y) y = SelfModulatedLayerNormalization(z_dim // 4)([y, x_max]) y = CuDNNLSTM(z_dim, return_sequences=True)(y) yl = SelfModulatedLayerNormalization(z_dim // 4)([y, x_max]) # 逆向decoder,单向LSTM y = embedding(yr) y = SelfModulatedLayerNormalization(z_dim // 4)([y, x_max]) y = CuDNNLSTM(z_dim, return_sequences=True)(y) y = SelfModulatedLayerNormalization(z_dim // 4)([y, x_max]) y = CuDNNLSTM(z_dim, return_sequences=True)(y) yr = SelfModulatedLayerNormalization(z_dim // 4)([y, x_max]) # 对齐attention + 检索attention yl_ = Attention(8, 16, mask_right=True)([yl, yr, yr]) ylx = Attention(8, 16)([yl, x, x, x_mask]) yl = Concatenate()([yl, yl_, ylx]) # 对齐attention + 检索attention yr_ = Attention(8, 16, mask_right=True)([yr, yl, yl]) yrx = Attention(8, 16)([yr, x, x, x_mask]) yr = Concatenate()([yr, yr_, yrx]) # 最后的输出分类(左右共享权重) classifier = Dense(len(chars) + 4) yl = Dense(char_size)(yl) yl = LeakyReLU(0.2)(yl) yl = classifier(yl) yl = Lambda(lambda x: (x[0] + x[1]) / 2)([yl, x_prior]) # 与先验结果平均 yl = Activation('softmax')(yl) yr = Dense(char_size)(yr) yr = LeakyReLU(0.2)(yr) yr = classifier(yr) yr = Lambda(lambda x: (x[0] + x[1]) / 2)([yr, x_prior]) # 与先验结果平均 yr = Activation('softmax')(yr) # 交叉熵作为loss,但mask掉padding部分 cross_entropy_1 = K.sparse_categorical_crossentropy( yl_in[:, 1:], yl[:, :-1]) cross_entropy_1 = K.sum(cross_entropy_1 * y_mask[:, 1:, 0]) / K.sum( y_mask[:, 1:, 0]) cross_entropy_2 = K.sparse_categorical_crossentropy( yr_in[:, 1:], yr[:, :-1]) cross_entropy_2 = K.sum(cross_entropy_2 * y_mask[:, 1:, 0]) / K.sum( y_mask[:, 1:, 0]) cross_entropy = (cross_entropy_1 + cross_entropy_2) / 2 model = Model([x_in, yl_in, yr_in], [yl, yr]) model.add_loss(cross_entropy) model.compile(optimizer=Adam(1e-3)) return model
def weighted_accuracy(y_true, y_pred): weights = y_true * weight_of_ones weighted_equal = K.cast(K.equal(y_true, K.round(y_pred)), K.floatx()) * weights return K.mean(weighted_equal)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 # Applies bounds on actual learning rate step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) final_lr = self.final_lr * lr / self.base_lr lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.)) upper_bound = final_lr * (1. + 1. / (self.gamma * t)) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsbound: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): # apply weight decay if self.weight_decay != 0.: g += self.weight_decay * K.stop_gradient(p) m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsbound: vhat_t = K.maximum(vhat, v_t) denom = (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: denom = (K.sqrt(v_t) + self.epsilon) # Compute the bounds step_size_p = step_size * K.ones_like(denom) step_size_p_bound = step_size_p / denom # TODO: Replace with K.clip after releast of Keras > 2.2.4 bounded_lr_t = m_t * tf.clip_by_value(step_size_p_bound, lower_bound, upper_bound) p_t = p - bounded_lr_t self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def _interpolate(self, image, sampled_grids, output_size): batch_size = K.shape(image)[0] height = K.shape(image)[1] width = K.shape(image)[2] num_channels = K.shape(image)[3] x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32') y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32') x = .5 * (x + 1.0) * K.cast(width, dtype='float32') y = .5 * (y + 1.0) * K.cast(height, dtype='float32') x0 = K.cast(x, 'int32') x1 = x0 + 1 y0 = K.cast(y, 'int32') y1 = y0 + 1 max_x = int(K.int_shape(image)[2] - 1) max_y = int(K.int_shape(image)[1] - 1) x0 = K.clip(x0, 0, max_x) x1 = K.clip(x1, 0, max_x) y0 = K.clip(y0, 0, max_y) y1 = K.clip(y1, 0, max_y) pixels_batch = K.arange(0, batch_size) * (height * width) pixels_batch = K.expand_dims(pixels_batch, axis=-1) flat_output_size = output_size[0] * output_size[1] base = K.repeat_elements(pixels_batch, flat_output_size, axis=1) base = K.flatten(base) # base_y0 = base + (y0 * width) base_y0 = y0 * width base_y0 = base + base_y0 # base_y1 = base + (y1 * width) base_y1 = y1 * width base_y1 = base_y1 + base indices_a = base_y0 + x0 indices_b = base_y1 + x0 indices_c = base_y0 + x1 indices_d = base_y1 + x1 flat_image = K.reshape(image, shape=(-1, num_channels)) flat_image = K.cast(flat_image, dtype='float32') pixel_values_a = K.gather(flat_image, indices_a) pixel_values_b = K.gather(flat_image, indices_b) pixel_values_c = K.gather(flat_image, indices_c) pixel_values_d = K.gather(flat_image, indices_d) x0 = K.cast(x0, 'float32') x1 = K.cast(x1, 'float32') y0 = K.cast(y0, 'float32') y1 = K.cast(y1, 'float32') area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1) area_b = K.expand_dims(((x1 - x) * (y - y0)), 1) area_c = K.expand_dims(((x - x0) * (y1 - y)), 1) area_d = K.expand_dims(((x - x0) * (y - y0)), 1) values_a = area_a * pixel_values_a values_b = area_b * pixel_values_b values_c = area_c * pixel_values_c values_d = area_d * pixel_values_d return values_a + values_b + values_c + values_d
def accuracy(y_true, y_pred): return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
def softmax_activation(self, mem): """Softmax activation.""" return k.cast( k.less_equal(k.random_uniform(k.shape(mem)), k.softmax(mem)), k.floatx())
def neighbor(y_true, y_pred, n=2): ##### NOT WORKING ##### ''' Trying to do what neighbor_accuracy does later ''' return K.cast( K.lesser_equal( K.abs(K.argmax(y_pred, axis=-1) - K.argmax(y_true, axis=-1)), n), K.floatx())
def linear_activation(self, mem): """Linear activation.""" return k.cast(k.greater_equal(mem, self.v_thresh), k.floatx())
def to_floatx(x): return K.cast(x, K.floatx())
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def call(self, x, mask=None): input_shape = K.shape(x) if self.dim_ordering == 'th': num_rows = input_shape[2] num_cols = input_shape[3] elif self.dim_ordering == 'tf': num_rows = input_shape[1] num_cols = input_shape[2] row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list] col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list] outputs = [] if self.dim_ordering == 'th': for pool_num, num_pool_regions in enumerate(self.pool_list): for jy in range(num_pool_regions): for ix in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [ input_shape[0], input_shape[1], y2 - y1, x2 - x1 ] x_crop = x[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': for pool_num, num_pool_regions in enumerate(self.pool_list): for jy in range(num_pool_regions): for ix in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [ input_shape[0], y2 - y1, x2 - x1, input_shape[3] ] x_crop = x[:, y1:y2, x1:x2, :] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(1, 2)) outputs.append(pooled_val) if self.dim_ordering == 'th': outputs = K.concatenate(outputs) elif self.dim_ordering == 'tf': #outputs = K.concatenate(outputs,axis = 1) outputs = K.concatenate(outputs) #outputs = K.reshape(outputs,(len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1])) #outputs = K.permute_dimensions(outputs,(3,1,0,2)) #outputs = K.reshape(outputs,(input_shape[0], self.num_outputs_per_channel * self.nb_channels)) return outputs
def chain_crf_loss(y, x, U, b_start=None, b_end=None, mask=None): '''Variant of sparse_chain_crf_loss but with one-hot encoded tags y.''' y_sparse = K.argmax(y, -1) y_sparse = K.cast(y_sparse, 'int32') return sparse_chain_crf_loss(y_sparse, x, U, b_start, b_end, mask)
def binary_PTA(y_true, y_pred, threshold=K.variable(value=0.5)): y_pred = K.cast(y_pred >= threshold, 'float32') P = K.sum(y_true) TP = K.sum(y_pred * y_true) return TP / P
def call(self, x, mask=None): assert(len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op # in theano. The theano implementation is much less efficient and leads to long compile times if self.dim_ordering == 'th': for jy in range(num_pool_regions): for ix in range(num_pool_regions): x1 = x + ix * row_length x2 = x1 + row_length y1 = y + jy * col_length y2 = y1 + col_length x1 = K.cast(x1, 'int32') x2 = K.cast(x2, 'int32') y1 = K.cast(y1, 'int32') y2 = K.cast(y2, 'int32') x2 = x1 + K.maximum(1,x2-x1) y2 = y1 + K.maximum(1,y2-y1) new_shape = [input_shape[0], input_shape[1], y2 - y1, x2 - x1] x_crop = img[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': x = K.cast(x, 'int32') y = K.cast(y, 'int32') w = K.cast(w, 'int32') h = K.cast(h, 'int32') rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) if self.dim_ordering == 'th': final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3)) else: final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def smooth_l1_loss(y_true, y_pred): diff = K.abs(y_true - y_pred) less_than_one = K.cast(K.less(diff, 1.0), "float32") loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) return loss
def castF(x): return K.cast(x, K.floatx())