def make_patches_grid(x, patch_size, patch_stride): '''Break image `x` up into a grid of patches. input shape: (channels, rows, cols) output shape: (rows, cols, channels, patch_rows, patch_cols) ''' from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T x = K.expand_dims(x, 0) xs = K.shape(x) num_rows = 1 + (xs[-2] - patch_size) // patch_stride num_cols = 1 + (xs[-1] - patch_size) // patch_stride num_channels = xs[-3] patches = images2neibs( x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = K.reshape( patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) patches_norm = K.sqrt( K.sum(K.square(patches), axis=(2, 3, 4), keepdims=True)) return patches, patches_norm
def call(self, x): #如果只传入Q_seq,K_seq,V_seq,那么就不做Mask #如果同时传入Q_seq,K_seq,V_seq,Q_len,V_len,那么对多余部分做Mask if len(x) == 3: Q_seq,K_seq,V_seq = x Q_len,V_len = None,None elif len(x) == 5: Q_seq,K_seq,V_seq,Q_len,V_len = x #对Q、K、V做线性变换 Q_seq = K.dot(Q_seq, self.WQ) Q_seq = K.reshape(Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head)) Q_seq = K.permute_dimensions(Q_seq, (0,2,1,3)) K_seq = K.dot(K_seq, self.WK) K_seq = K.reshape(K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head)) K_seq = K.permute_dimensions(K_seq, (0,2,1,3)) V_seq = K.dot(V_seq, self.WV) V_seq = K.reshape(V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head)) V_seq = K.permute_dimensions(V_seq, (0,2,1,3)) #计算内积,然后mask,然后softmax A = K.batch_dot(Q_seq, K_seq, axes=[3,3]) A = K.permute_dimensions(A, (0,3,2,1)) A = self.Mask(A, V_len, 'add') A = K.permute_dimensions(A, (0,3,2,1)) A = K.softmax(A) #输出并mask O_seq = K.batch_dot(A, V_seq, axes=[3,2]) O_seq = K.permute_dimensions(O_seq, (0,2,1,3)) O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim)) O_seq = self.Mask(O_seq, Q_len, 'mul') return O_seq
def get_output(self, train=False): def format_shape(shape): if K._BACKEND == 'tensorflow': def trf(x): try: return int(x) except TypeError: return x return map(trf, shape) return shape X = self.get_input(train) in_shape = format_shape(K.shape(X)) batch_flatten_len = K.prod(in_shape[:2]) cast_in_shape = (batch_flatten_len, ) + tuple(in_shape[i] for i in range(2, K.ndim(X))) pre_outs = self.layer(K.reshape(X, cast_in_shape)) out_shape = format_shape(K.shape(pre_outs)) cast_out_shape = (in_shape[0], in_shape[1]) + tuple(out_shape[i] for i in range(1, K.ndim(pre_outs))) outputs = K.reshape(pre_outs, cast_out_shape) return outputs
def __call__(self, loss): from . import patches output = self.layer.get_output(True) assert K.ndim(output) == 4 batch_size = K.shape(output)[0] // 2 patch_size = self.patch_size patch_stride = 1 generated = output[:batch_size, :, :, :] content = output[batch_size:, :, :, :] # extract patches from feature maps generated_patches, generated_patches_norm = \ patches.make_patches(generated, patch_size, patch_stride) content_patches, content_patches_norm = \ patches.make_patches(content, patch_size, patch_stride) a_patches, a_patches_norm = \ patches.make_patches(K.variable(self.features_a), patch_size, patch_stride) ap_patches, ap_patches_norm = \ patches.make_patches(K.variable(self.features_ap), patch_size, patch_stride) # find best patches and calculate loss patch_ids = patches.find_patch_matches( content_patches, content_patches_norm, a_patches / a_patches_norm) best_analogy_patches = K.reshape( ap_patches[patch_ids], K.shape(generated_patches)) loss += self.weight * K.sum(K.square(best_analogy_patches - generated_patches)) / patch_size ** 2 return loss
def call(self, X): if type(X) is not list or len(X) != 2: raise Exception("SquareAttention must be called on a list of two tensors. Got: " + str(X)) frame, position = X[0], X[1] # Reshaping the input to exclude the time dimension frameShape = K.shape(frame) positionShape = K.shape(position) (chans, height, width) = frameShape[-3:] targetDim = positionShape[-1] frame = K.reshape(frame, (-1, chans, height, width)) position = K.reshape(position, (-1, ) + (targetDim, )) # Applying the attention hw = THT.abs_(position[:, 2] - position[:, 0]) * self.scale / 2.0 hh = THT.abs_(position[:, 3] - position[:, 1]) * self.scale / 2.0 position = THT.maximum(THT.set_subtensor(position[:, 0], position[:, 0] - hw), -1.0) position = THT.minimum(THT.set_subtensor(position[:, 2], position[:, 2] + hw), 1.0) position = THT.maximum(THT.set_subtensor(position[:, 1], position[:, 1] - hh), -1.0) position = THT.minimum(THT.set_subtensor(position[:, 3], position[:, 3] + hh), 1.0) rX = Data.linspace(-1.0, 1.0, width) rY = Data.linspace(-1.0, 1.0, height) FX = THT.gt(rX, position[:,0].dimshuffle(0,'x')) * THT.le(rX, position[:,2].dimshuffle(0,'x')) FY = THT.gt(rY, position[:,1].dimshuffle(0,'x')) * THT.le(rY, position[:,3].dimshuffle(0,'x')) m = FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1) m = m + self.alpha - THT.gt(m, 0.) * self.alpha frame = frame * m.dimshuffle(0, 'x', 1, 2) # Reshaping the frame to include time dimension output = K.reshape(frame, frameShape) return output
def call(self, position): inputDim = K.ndim(position) positionShape = K.shape(position) targetDim = positionShape[-1] position = K.reshape(position, (-1, targetDim)) samples = K.shape(position)[0] theta = THT.zeros((samples, 3, 3)) chw = self.toChw(position) chw = K.reshape(chw, (samples, targetDim)) dx = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,)) dy = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,)) cX = chw[:, 0] + dx cY = chw[:, 1] + dy h = K.maximum(chw[:, 2] * (1.0 + self.context), self.minSide) w = K.maximum(chw[:, 3] * (1.0 + self.context), self.minSide) # Calculating the parameters of the transformation tx = cX ty = cY sx = w / 2.0 # Scale x sy = h / 2.0 # Scale y # Setting transformation theta = THT.set_subtensor(theta[:, 0, 0], sx) theta = THT.set_subtensor(theta[:, 1, 1], sy) theta = THT.set_subtensor(theta[:, 0, 2], tx) theta = THT.set_subtensor(theta[:, 1, 2], ty) theta = THT.set_subtensor(theta[:, 2, 2], 1.0) thetaShape = K.concatenate([positionShape[:-1], K.shape(theta)[-2:]]) theta = THT.reshape(theta, thetaShape, ndim=inputDim + 1) return theta
def call(self, inputs, **kwargs): """Following the routing algorithm from Hinton's paper, but replace b = b + <u,v> with b = <u,v>. This change can improve the feature representation of the capsule. However, you can replace b = K.batch_dot(outputs, hat_inputs, [2, 3]) with b += K.batch_dot(outputs, hat_inputs, [2, 3]) to get standard routing. """ if self.share_weights: hat_inputs = K.conv1d(inputs, self.kernel) else: hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1]) batch_size = K.shape(inputs)[0] input_num_capsule = K.shape(inputs)[1] hat_inputs = K.reshape(hat_inputs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) b = K.zeros_like(hat_inputs[:, :, :, 0]) print(self.routings) for i in range(self.routings): c = K.softmax(b, 1) o = self.activation(K.batch_dot(c, hat_inputs, [2, 2])) if i < self.routings - 1: b = K.batch_dot(o, hat_inputs, [2, 3]) if K.backend() == 'theano': o = K.sum(o, axis=1) return o
def call(self, X): if type(X) is not list or len(X) != 2: raise Exception("GaussianAttention must be called on a list of two tensors. Got: " + str(X)) frame, position = X[0], X[1] # Reshaping the input to exclude the time dimension frameShape = K.shape(frame) positionShape = K.shape(position) (chans, height, width) = frameShape[-3:] targetDim = positionShape[-1] frame = K.reshape(frame, (-1, chans, height, width)) position = K.reshape(position, (-1, ) + (targetDim, )) cx = (position[:, 0] + position[:, 2]) / 2.0 cy = (position[:, 1] + position[:, 3]) / 2.0 sx = (position[:, 2] - cx) * 0.60 sy = (position[:, 3] - cy) * 0.60 rX = Data.linspace(-1.0, 1.0, width) rY = Data.linspace(-1.0, 1.0, height) FX = K.exp(-(rX - cx.dimshuffle(0, 'x')) ** 2 / (2.0 * (sx.dimshuffle(0, 'x') ** 2 + self.epsilon))) FY = K.exp(-(rY - cy.dimshuffle(0, 'x')) ** 2 / (2.0 * (sy.dimshuffle(0, 'x') ** 2 + self.epsilon))) m = (FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1)) m = m + self.alpha m = m - K.greater(m, 1.0) * (m - 1.0) frame = frame * m.dimshuffle(0, 'x', 1, 2) # Reshaping the frame to include time dimension output = K.reshape(frame, frameShape) return output
def get_split_averages(input_tensor, input_mask, indices): # Splits input tensor into three parts based on the indices and # returns average of values prior to index, values at the index and # average of values after the index. # input_tensor: (batch_size, input_length, input_dim) # input_mask: (batch_size, input_length) # indices: (batch_size, 1) # (1, input_length) length_range = K.expand_dims(K.arange(K.shape(input_tensor)[1]), dim=0) # (batch_size, input_length) batched_range = K.repeat_elements(length_range, K.shape(input_tensor)[0], 0) tiled_indices = K.repeat_elements(indices, K.shape(input_tensor)[1], 1) # (batch_size, input_length) greater_mask = K.greater(batched_range, tiled_indices) # (batch_size, input_length) lesser_mask = K.lesser(batched_range, tiled_indices) # (batch_size, input_length) equal_mask = K.equal(batched_range, tiled_indices) # (batch_size, input_length) # We also need to mask these masks using the input mask. # (batch_size, input_length) if input_mask is not None: greater_mask = switch(input_mask, greater_mask, K.zeros_like(greater_mask)) lesser_mask = switch(input_mask, lesser_mask, K.zeros_like(lesser_mask)) post_sum = K.sum(switch(K.expand_dims(greater_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) pre_sum = K.sum(switch(K.expand_dims(lesser_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) values_at_indices = K.sum(switch(K.expand_dims(equal_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) post_normalizer = K.expand_dims(K.sum(greater_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) pre_normalizer = K.expand_dims(K.sum(lesser_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) return K.cast(pre_sum / pre_normalizer, 'float32'), values_at_indices, K.cast(post_sum / post_normalizer, 'float32')
def time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, activation='linear'): '''Apply y.w + b for every temporal slice y of x. ''' activation = activations.get(activation) if not input_dim: # won't work with TensorFlow input_dim = K.shape(x)[2] if not timesteps: # won't work with TensorFlow timesteps = K.shape(x)[1] if not output_dim: # won't work with TensorFlow output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b: x = x + b # reshape to 3D tensor x = K.reshape(activation(x), (-1, timesteps, output_dim)) return x
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta*box_delta_scale) confidence_loss = object_mask * K.square(1-pred_confidence) + \ (1-object_mask) * K.square(0-pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs-pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def __call__(self, loss): output = self.layer.get_output(True) batch_size = K.shape(output)[0] // 2 generated = output[:batch_size, :, :, :] loss += self.weight * K.mean( K.sum(K.square(gram_matrix(self.target) - gram_matrix(generated)), axis=(1,2)) ) / (4.0 * K.square(K.prod(K.shape(generated)[1:]))) return loss
def buildMixModel(img_channels=3, lr = 0.01,weight_decay = 1e-7, loss='mse',activ='relu', last_activ='sigmoid'): # just build a tiny fcn model, you can use more layers and more filters as you want main_input = Input(shape=(img_channels, None, None), name='input') conv_1 = Convolution2D(4,3,3, border_mode = 'same', activation= activ, init='orthogonal',name='conv_1', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(main_input) max_1 = MaxPooling2D(pool_size = (2,2))(conv_1) conv_2 = Convolution2D(8,3,3, border_mode = 'same', activation=activ, init='orthogonal',name='conv_2', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(max_1) max_2 = MaxPooling2D(pool_size = (2,2))(conv_2) dp_0 = Dropout(0.25)(max_2) conv_3 = Convolution2D(16,3,3, border_mode = 'same', activation= activ, init='orthogonal',name='conv_3', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(dp_0) # 25 max_3 = MaxPooling2D(pool_size = (2,2))(conv_3) # 12 conv_4 = Convolution2D(32,3,3, border_mode = 'same', activation=activ, init='orthogonal',name='conv_4', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(max_3) # 12 max_4 = MaxPooling2D(pool_size = (2,2))(conv_4) # 12 dp_1 = Dropout(0.25)(max_4) conv_5 = Convolution2D(64,3,3, border_mode = 'same', activation=activ, init='orthogonal',name='conv_5', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(dp_1) # 6 upsamp_0 = UpSampling2D((2,2))(conv_5) resize_0 = Resize2D(K.shape(conv_4))(upsamp_0) deconv_0 = Convolution2D(32,3,3, border_mode = 'same', activation=activ, init='orthogonal',name='deconv_0', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(resize_0) dp_2 = Dropout(0.25)(deconv_0) upsamp_1 = UpSampling2D((2,2))(dp_2) resize_1 = Resize2D(K.shape(conv_3))(upsamp_1) deconv_1 = Convolution2D(16,3,3, border_mode = 'same', activation=activ, init='orthogonal',name='deconv_1', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(resize_1) upsamp_2 = UpSampling2D((2,2))(deconv_1) resize_2 = Resize2D(K.shape(conv_2))(upsamp_2) deconv_2 = Convolution2D(8,3,3, border_mode = 'same', activation=activ,init='orthogonal',name='deconv_2', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(resize_2) dp_3 = Dropout(0.25)(deconv_2) upsamp_3 = UpSampling2D((2,2))(dp_3) resize_3 = Resize2D(K.shape(conv_1))(upsamp_3) deconv_3 = Convolution2D(4,3,3, border_mode = 'same', activation=activ,init='orthogonal',name='deconv_3', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(resize_3) last_conv = Convolution2D(1,3,3, border_mode = 'same', activation=last_activ,init='orthogonal', name= 'output_mask', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(deconv_3) model = Model(input=[main_input], output=[last_conv]) #opt = SGD(lr=lr, decay= 1e-6, momentum=0.9,nesterov=True) #opt = Adadelta(lr=lr, rho=0.95, epsilon=1e-06,clipvalue=10) opt = adam(lr=lr) model.compile(loss={'output_mask': loss }, optimizer=opt) return model
def Skew(inputs): inputs_ = K.permute_dimensions(inputs, (3,0,1,2)) buffer_ = T.zeros((K.shape(inputs)[3], K.shape(inputs)[0], K.shape(inputs)[1]+K.shape(inputs)[3]-1, K.shape(inputs)[2])) def fnc(buf, inp, i): return T.set_subtensor(buf[:, i:i+K.shape(inputs)[1], :], inp[:,:,:]) res, _ = theano.scan(fn=fnc, sequences=[buffer_, inputs_, T.arange(K.shape(inputs)[3])]) res = K.permute_dimensions(res, (1,2,3,0)) return res
def make_patches(x, patch_size, patch_stride): from theano.tensor.nnet.neighbours import images2neibs x = K.expand_dims(x, 0) patches = images2neibs(x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (K.shape(x)[1], K.shape(patches)[0] // K.shape(x)[1], patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) patches_norm = K.l2_normalize(patches, 1) return patches, patches_norm
def _transform(self, X, affine_transformation, output_size): batch_size, num_channels = K.shape(X)[0], K.shape(X)[3] transformations = K.reshape(affine_transformation, shape=(batch_size, 2, 3)) # transformations = K.cast(affine_transformation[:, 0:2, :], 'float32') regular_grids = self._make_regular_grids(batch_size, *output_size) sampled_grids = K.batch_dot(transformations, regular_grids) interpolated_image = self._interpolate(X, sampled_grids, output_size) new_shape = (batch_size, output_size[0], output_size[1], num_channels) interpolated_image = K.reshape(interpolated_image, new_shape) return interpolated_image
def make_patches(x, shape): x = K.expand_dims(x, 0) patches = images2neibs(x, (shape, shape)) patches = K.reshape(patches, (K.shape(x)[1], K.shape(patches)[0] / K.shape(x)[1], shape, shape)) patches_norm = K.sqrt(K.sum(K.square(patches), axis=(1,2,3), keepdims=True)) return patches, patches_norm
def make_patches(x, patch_size, patch_stride): '''Break image `x` up into a bunch of patches.''' from theano.tensor.nnet.neighbours import images2neibs x = K.expand_dims(x, 0) patches = images2neibs(x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (K.shape(x)[1], K.shape(patches)[0] // K.shape(x)[1], patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) patches_norm = K.sqrt(K.sum(K.square(patches), axis=(1,2,3), keepdims=True)) return patches, patches_norm
def call(self, x, mask=None): if (self.size == None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) batch_size, seq_len = K.shape(x)[0], K.shape(x)[1] position_j = 1. / K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size) position_j = K.expand_dims(position_j, 0) position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1 # K.arange不支持变长,只好用这种方法生成 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2)
def call(self, X, mask=None): # 1D -> 2D batch = K.shape(X)[0] width = deconv_output_length(K.shape(X)[1], self.filter_length, self.padding, self.strides[2]) print("Output width: ", width) print("Input shape: ", K.shape(X)) X = K.expand_dims(X,2) print("Input shape after expand: ", K.shape(X)) # X = K.permute_dimensions(X, (0, 2, 3, 1)) X = K.permute_dimensions(X, (0, 2, 1, 3)) print("Input shape after permute: ", K.shape(X)) deconv_shape = tf.pack([batch, 1, width, self.nb_filter]) print("Deconv shape: ", deconv_shape) conv_out = tf.nn.conv2d_transpose(X, self.W, strides=self.strides, padding=self.padding.upper(), output_shape=deconv_shape) output = conv_out + K.reshape(self.b, (1, 1, 1, self.W_shape[2])) print("Output shape: ", K.shape(output)) # output = K.permute_dimensions(output, (0, 3, 1, 2)) output = K.permute_dimensions(output, (0, 2, 1, 3)) print("Output shape after permute: ", K.shape(output)) # 2D -> 1D output = K.squeeze(output,2) print("Output shape after squeeze: ", K.shape(output)) return output
def loss(y_true, y_pred): from plasma.conf import conf fac = MaxHingeTarget.fac #overall_fac = np.prod(np.array(K.shape(y_pred)[1:]).astype(np.float32)) overall_fac = K.prod(K.cast(K.shape(y_pred)[1:],K.floatx())) max_val = K.max(y_pred,axis=-2) #temporal axis! max_val1 = K.repeat(max_val,K.shape(y_pred)[-2]) mask = K.cast(K.equal(max_val1,y_pred),K.floatx()) y_pred1 = mask * y_pred + (1-mask) * y_true weight_mask = K.mean(y_true,axis=-1) weight_mask = K.cast(K.greater(weight_mask,0.0),K.floatx()) #positive label! weight_mask = fac*weight_mask + (1 - weight_mask) #return weight_mask*squared_hinge(y_true,y_pred1) return conf['model']['loss_scale_factor']*overall_fac*weight_mask*hinge(y_true,y_pred1)
def sampling(args): z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon
def call(self, inputs, **kwargs): assert isinstance(inputs, list) and len(inputs) == 3 first, second, features = inputs[0], inputs[1], inputs[2] if not self.from_logits: first = kb.clip(first, 1e-10, 1.0) second = kb.clip(second, 1e-10, 1.0) first_, second_ = kb.log(first), kb.log(second) else: first_, second_ = first, second # embedded_features.shape = (M, T, 1) if self.use_intermediate_layer: features = kb.dot(features, self.first_kernel) features = kb.bias_add(features, self.first_bias, data_format="channels_last") features = self.intermediate_activation(features) embedded_features = kb.dot(features, self.features_kernel) embedded_features = kb.bias_add( embedded_features, self.features_bias, data_format="channels_last") if self.use_dimension_bias: tiling_shape = [1] * (kb.ndim(first)-1) + [kb.shape(first)[-1]] embedded_features = kb.tile(embedded_features, tiling_shape) embedded_features = kb.bias_add( embedded_features, self.dimensions_bias, data_format="channels_last") sigma = kb.sigmoid(embedded_features) result = weighted_sum(first_, second_, sigma, self.first_threshold, self.second_threshold) probs = kb.softmax(result) if self.return_logits: return [probs, result] return probs
def call(self, x, mask=None): input_shape = K.shape(x) if self.dim_ordering == 'th': num_rows = input_shape[2] num_cols = input_shape[3] elif self.dim_ordering == 'tf': num_rows = input_shape[1] num_cols = input_shape[2] row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list] col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list] outputs = [] if self.dim_ordering == 'th': for pool_num, num_pool_regions in enumerate(self.pool_list): for ix in range(num_pool_regions): for jy in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [input_shape[0], input_shape[1], y2 - y1, x2 - x1] x_crop = x[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': for pool_num, num_pool_regions in enumerate(self.pool_list): for ix in range(num_pool_regions): for jy in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [input_shape[0], y2 - y1, x2 - x1, input_shape[3]] x_crop = x[:, y1:y2, x1:x2, :] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(1, 2)) outputs.append(pooled_val) outputs = K.concatenate(outputs) return outputs
def _loss_tensor(y_true, y_pred): max_val = K.max(y_pred,axis=-2) #temporal axis! max_val = K.repeat(max_val,K.shape(y_pred)[-2]) print(K.eval(max_val)) mask = K.cast(K.equal(max_val,y_pred),K.floatx()) y_pred = mask * y_pred + (1-mask) * y_true return squared_hinge(y_true,y_pred)
def call(self, X, mask=None): input_shape = self.input_spec[0].shape x = K.reshape(X[0], (-1, input_shape[2])) target = X[1].flatten() if self.trainable else None Y = h_softmax(x, K.shape(x)[0], self.output_dim, self.n_classes, self.n_outputs_per_class, self.W1, self.b1, self.W2, self.b2, target) output_dim = 1 if self.trainable else self.output_dim input_length = K.shape(X[0])[1] y = K.reshape(Y, (-1, input_length, output_dim)) return y
def yolo_head(feats, anchors, num_classes, input_shape): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) return box_xy, box_wh, box_confidence, box_class_probs
def call(self, x, mask=None): if hasattr(x, '_keras_shape'): input_shape = x._keras_shape else: input_shape = self._input_shape #import pdb #pdb.set_trace() #if self.last_two is not None: # last2 = self.last_two #else: # input_shape = x._keras_shape # last2 = input_shape[-2:] #out_shape = K.shape(x)[:-2] x = K.reshape(x, (-1,) + input_shape[-2:]) # (batch * d1 * ... * dn-2, dn-1, dn) if mask is not None: mask_shape = (K.shape(x)[0], -1) mask = K.reshape(mask, mask_shape) # give it the same first dim y = self.layer.call(x, mask) #try: #output_shape = self.get_output_shape_for(K.shape(x)) #except: output_shape = self.get_output_shape_for(input_shape) #import pdb #pdb.set_trace() return K.cast(K.reshape(y, output_shape), K.floatx())
def image_categorical_crossentropy(output, target, from_logits=False): output = T.clip(output, _EPSILON, 1.0 - _EPSILON) output_ = K.reshape(output, (-1, 256)) target_ = K.reshape(target, (-1, 256)) out = T.nnet.categorical_crossentropy(output_, target_) out = K.reshape(out,(K.shape(output)[0],-1)) return T.mean(T.mean(out, axis=1))
def batch_gather(reference, indices): """ C+P From Keras pull request https://github.com/keras-team/keras/pull/6377/files Batchwise gathering of row indices. The numpy equivalent is `reference[np.arange(batch_size), indices]`, where `batch_size` is the first dimension of the reference tensor. # Arguments reference: A tensor with ndim >= 2 of shape. (batch_size, dim1, dim2, ..., dimN) indices: A 1d integer tensor of shape (batch_size) satisfying 0 <= i < dim2 for each element i. # Returns The selected tensor with shape (batch_size, dim2, ..., dimN). # Examples 1. If reference is `[[3, 5, 7], [11, 13, 17]]` and indices is `[2, 1]` then the result is `[7, 13]`. 2. If reference is ``` [[[2, 3], [4, 5], [6, 7]], [[10, 11], [12, 13], [16, 17]]] ``` and indices is `[2, 1]` then the result is `[[6, 7], [12, 13]]`. """ batch_size = K.shape(reference)[0] indices = tf.stack([tf.range(batch_size), indices], axis=1) return tf.gather_nd(reference, indices)
def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=epsilon_std) return z_mean + K.exp(z_log_var / 2) * epsilon
def call(self, x, mask=None): # We assume the tuples are SVO and each slot is represented as vector. # Moreover, we assume each answer option is encoded as a single vector. # knowledge_embedding: (batch_size, num_tuples, tuple_size, embed_dim) # question_embedding: (batch_size, question_length, embed_dim) # answer_embedding: (batch_size, num_options, embed_dim) knowledge_embedding, question_embedding, answer_embedding = x if mask is None: knowledge_mask = question_mask = answer_mask = None else: knowledge_mask, question_mask, answer_mask = mask if knowledge_mask is None: sv_knowledge_mask = vo_knowledge_mask = subj_knowledge_mask = obj_knowledge_mask = None else: # Take out the relevant parts for each part of the tuple and reshape SV and VO masks using # batch_flatten. # (batch_size, num_tuples*2) sv_knowledge_mask = K.batch_flatten(knowledge_mask[:, :, :2]) # (batch_size, num_tuples*2) vo_knowledge_mask = K.batch_flatten(knowledge_mask[:, :, 1:]) # (batch_size, num_tuples) subj_knowledge_mask = knowledge_mask[:, :, 0] # (batch_size, num_tuples) obj_knowledge_mask = knowledge_mask[:, :, 2] batch_size = K.shape(knowledge_embedding)[0] sv_knowledge = K.reshape( knowledge_embedding[:, :, :2, :], (batch_size, self.num_tuples * 2, self.input_dim)) vo_knowledge = K.reshape( knowledge_embedding[:, :, 1:, :], (batch_size, self.num_tuples * 2, self.input_dim)) # (batch_size, num_tuples, embed_dim) subj_knowledge = knowledge_embedding[:, :, 0, :] # (batch_size, num_tuples, embed_dim) obj_knowledge = knowledge_embedding[:, :, 2, :] ## Step A1: Align SV with question. # Source is question, target is SV knowledge # (batch_size, question_length, num_tuples*2) sv_question_knowledge_alignment = self._align( question_embedding, sv_knowledge, question_mask, sv_knowledge_mask, normalize_alignment=False) # Sum probabilities over S and V slots. This is still a valid probability distribution. # (batch_size, question_length, num_tuples) sv_question_tuple_weights = K.sum(K.reshape( sv_question_knowledge_alignment, (batch_size, self.question_length, self.num_tuples, 2)), axis=-1) # Average over question length. This is essentially the weights of tuples based on how well their # S and V slots align to any word in the question. # Insight: This is essentially \sum_{i} p_align(tuple | q_word_i) * p_imp(q_word_i), where q_word_i is # the ith word in the question, p_align is the alignment weight and p_imp is the importance of the # question word, and p_imp is uniform. # (batch_size, num_tuples) sv_tuple_weights = K.mean(sv_question_tuple_weights, axis=1) ## Step A2: Align answer with Obj. # Source is obj knowledge, target is answer # (batch_size, num_tuples, num_options) obj_knowledge_answer_alignment = self._align(obj_knowledge, answer_embedding, obj_knowledge_mask, answer_mask, normalize_alignment=False) # (batch_size, num_tuples, num_options) tiled_sv_tuple_weights = K.dot(K.expand_dims(sv_tuple_weights), K.ones((1, self.num_options))) # Now we compute a weighted average over the tuples dimension, with the weights coming from how well # the tuples align with the question. # (batch_size, num_options) obj_answer_weights = K.sum(tiled_sv_tuple_weights * obj_knowledge_answer_alignment, axis=1) # Following steps are similar to what we did so far. Just substitute VO for SV and S for O. ## Step B1: Align VO with question vo_question_knowledge_alignment = self._align( question_embedding, vo_knowledge, question_mask, vo_knowledge_mask, normalize_alignment=False) vo_question_tuple_weights = K.sum(K.reshape( vo_question_knowledge_alignment, (batch_size, self.question_length, self.num_tuples, 2)), axis=-1) vo_tuple_weights = K.mean(vo_question_tuple_weights, axis=1) ## Step B2: Align answer with Subj subj_knowledge_answer_alignment = self._align( subj_knowledge, answer_embedding, subj_knowledge_mask, answer_mask, normalize_alignment=False) tiled_vo_tuple_weights = K.dot(K.expand_dims(vo_tuple_weights), K.ones((1, self.num_options))) subj_answer_weights = K.sum(tiled_vo_tuple_weights * subj_knowledge_answer_alignment, axis=1) # We now select the element wise max of obj_answer_weights and subj_answer_weights as our final weights. # (batch_size, num_options) max_answer_weights = switch( K.greater(obj_answer_weights, subj_answer_weights), obj_answer_weights, subj_answer_weights) # Renormalizing max weights. return K.softmax(max_answer_weights)
def compute_mask(self, inputs, mask=None): """Computes an output mask tensor for Embedding layer. This is based on the inputs, mask, and the inner layer. If batch size is specified: Simply return the input `mask`. (An rnn-based implementation with more than one rnn inputs is required but not supported in tf.keras yet.) Otherwise we call `compute_mask` of the inner layer at each time step. If the output mask at each time step is not `None`: (E.g., inner layer is Masking or RNN) Concatenate all of them and return the concatenation. If the output mask at each time step is `None` and the input mask is not `None`:(E.g., inner layer is Dense) Reduce the input_mask to 2 dimensions and return it. Otherwise (both the output mask and the input mask are `None`): (E.g., `mask` is not used at all) Return `None`. Args: inputs: Tensor with shape [batch size, timesteps, ...] indicating the input to TimeDistributed. If static shape information is available for "batch size", `mask` is returned unmodified. mask: Either None (indicating no masking) or a Tensor indicating the input mask for TimeDistributed. The shape can be static or dynamic. Returns: Either None (no masking), or a [batch size, timesteps, ...] Tensor with an output mask for the TimeDistributed layer with the shape beyond the second dimension being the value of the input mask shape(if the computed output mask is none), an output mask with the shape beyond the first dimension being the value of the mask shape(if mask is not None) or output mask with the shape beyond the first dimension being the value of the computed output shape. """ # cases need to call the layer.compute_mask when input_mask is None: # Masking layer and Embedding layer with mask_zero input_shape = tf.nest.map_structure( lambda x: tf.TensorShape(K.int_shape(x)), inputs) input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False) batch_size = tf_utils.convert_shapes(input_shape) batch_size = tf.nest.flatten(batch_size)[0] is_ragged_input = tf.nest.map_structure( lambda x: isinstance(x, tf.RaggedTensor), inputs) is_ragged_input = generic_utils.to_list( tf.nest.flatten(is_ragged_input)) if batch_size and not self._always_use_reshape or any(is_ragged_input): # batch size matters, we currently do not handle mask explicitly, or if # the layer always uses reshape approach, or the input is a ragged tensor. return mask inner_mask = mask if inner_mask is not None: inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2) inner_mask = K.reshape(inner_mask, inner_mask_shape) inner_input_shape = tf.nest.map_structure( lambda tensor: self._get_shape_tuple((-1, ), tensor, 2), inputs) inner_inputs = tf.__internal__.nest.map_structure_up_to( inputs, tf.reshape, inputs, inner_input_shape) output_mask = self.layer.compute_mask(inner_inputs, inner_mask) if output_mask is None: if mask is None: return None # input_mask is not None, and output_mask is None: # we should return a not-None mask output_mask = mask for _ in range(2, len(K.int_shape(mask))): output_mask = K.any(output_mask, axis=-1) else: # output_mask is not None. We need to reshape it input_length = tf_utils.convert_shapes(input_shape) input_length = tf.nest.flatten(input_length)[1] if not input_length: input_length = tf.nest.map_structure(lambda x: K.shape(x)[1], inputs) input_length = tf.nest.flatten(input_length)[0] output_mask_int_shape = K.int_shape(output_mask) if output_mask_int_shape is None: # if the output_mask does not have a static shape, # its shape must be the same as mask's if mask is not None: output_mask_int_shape = K.int_shape(mask) else: input_shape = generic_utils.to_list( tf.nest.flatten(input_shape))[0] output_mask_int_shape = K.compute_output_shape( input_shape)[:-1] output_mask_shape = self._get_shape_tuple( (-1, input_length), output_mask, 1, output_mask_int_shape[1:]) output_mask = K.reshape(output_mask, output_mask_shape) return output_mask
def repeat_output(input): shape = K.shape(x) return K.reshape(K.repeat(input, 4 * 4), (shape[0], 4, 4, 256))
def complex_standardization(input_centred, Vrr, Vii, Vri, layernorm=False, axis=-1): ndim = K.ndim(input_centred) input_dim = K.shape(input_centred)[axis] // 2 variances_broadcast = [1] * ndim variances_broadcast[axis] = input_dim if layernorm: variances_broadcast[0] = K.shape(input_centred)[0] # We require the covariance matrix's inverse square root. That first requires # square rooting, followed by inversion (I do this in that order because during # the computation of square root we compute the determinant we'll need for # inversion as well). # tau = Vrr + Vii = Trace. Guaranteed >= 0 because SPD tau = Vrr + Vii # delta = (Vrr * Vii) - (Vri ** 2) = Determinant. Guaranteed >= 0 because SPD delta = (Vrr * Vii) - (Vri**2) # s = np.sqrt(delta) # Determinant of square root matrix # t = np.sqrt(tau + 2 * s) s = K.sqrt(delta) # Determinant of square root matrix t = K.sqrt(tau + 2 * s) # The square root matrix could now be explicitly formed as # [ Vrr+s Vri ] # (1/t) [ Vir Vii+s ] # https://en.wikipedia.org/wiki/Square_root_of_a_2_by_2_matrix # but we don't need to do this immediately since we can also simultaneously # invert. We can do this because we've already computed the determinant of # the square root matrix, and can thus invert it using the analytical # solution for 2x2 matrices # [ A B ] [ D -B ] # inv( [ C D ] ) = (1/det) [ -C A ] # http://mathworld.wolfram.com/MatrixInverse.html # Thus giving us # [ Vii+s -Vri ] # (1/s)(1/t)[ -Vir Vrr+s ] # So we proceed as follows: inverse_st = 1.0 / (s * t) Wrr = (Vii + s) * inverse_st Wii = (Vrr + s) * inverse_st Wri = -Vri * inverse_st # And we have computed the inverse square root matrix W = sqrt(V)! # Normalization. We multiply, x_normalized = W.x. # The returned result will be a complex standardized input # where the real and imaginary parts are obtained as follows: # x_real_normed = Wrr * x_real_centred + Wri * x_imag_centred # x_imag_normed = Wri * x_real_centred + Wii * x_imag_centred broadcast_Wrr = K.reshape(Wrr, variances_broadcast) broadcast_Wri = K.reshape(Wri, variances_broadcast) broadcast_Wii = K.reshape(Wii, variances_broadcast) cat_W_4_real = K.concatenate([broadcast_Wrr, broadcast_Wii], axis=axis) cat_W_4_imag = K.concatenate([broadcast_Wri, broadcast_Wri], axis=axis) if (axis == 1 and ndim != 3) or ndim == 2: centred_real = input_centred[:, :input_dim] centred_imag = input_centred[:, input_dim:] elif ndim == 3: centred_real = input_centred[:, :, :input_dim] centred_imag = input_centred[:, :, input_dim:] elif axis == -1 and ndim == 4: centred_real = input_centred[:, :, :, :input_dim] centred_imag = input_centred[:, :, :, input_dim:] elif axis == -1 and ndim == 5: centred_real = input_centred[:, :, :, :, :input_dim] centred_imag = input_centred[:, :, :, :, input_dim:] else: raise ValueError( 'Incorrect Batchnorm combination of axis and dimensions. axis should be either 1 or -1. ' 'axis: ' + str(self.axis) + '; ndim: ' + str(ndim) + '.') rolled_input = K.concatenate([centred_imag, centred_real], axis=axis) output = cat_W_4_real * input_centred + cat_W_4_imag * rolled_input # Wrr * x_real_centered | Wii * x_imag_centered # + Wri * x_imag_centered | Wri * x_real_centered # ----------------------------------------------- # = output return output
def _scharr_edges(cls, image, magnitude): """ Returns a tensor holding modified Scharr edge maps. Parameters ---------- image: tensor Image tensor with shape [batch_size, h, w, d] and type float32. The image(s) must be 2x2 or larger. magnitude: bool Boolean to determine if the edge magnitude or edge direction is returned Returns ------- tensor Tensor holding edge maps for each channel. Returns a tensor with shape `[batch_size, h, w, d, 2]` where the last two dimensions hold `[[dy[0], dx[0]], [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]]` calculated using the Scharr filter. """ # Define vertical and horizontal Scharr filters. # TODO PlaidML: AttributeError: 'Value' object has no attribute 'get_shape' static_image_shape = image.get_shape() image_shape = K.shape(image) # 5x5 modified Scharr kernel ( reshape to (5,5,1,2) ) matrix = np.array([[[[0.00070, 0.00070]], [[0.00520, 0.00370]], [[0.03700, 0.00000]], [[0.00520, -0.0037]], [[0.00070, -0.0007]]], [[[0.00370, 0.00520]], [[0.11870, 0.11870]], [[0.25890, 0.00000]], [[0.11870, -0.1187]], [[0.00370, -0.0052]]], [[[0.00000, 0.03700]], [[0.00000, 0.25890]], [[0.00000, 0.00000]], [[0.00000, -0.2589]], [[0.00000, -0.0370]]], [[[-0.0037, 0.00520]], [[-0.1187, 0.11870]], [[-0.2589, 0.00000]], [[-0.1187, -0.1187]], [[-0.0037, -0.0052]]], [[[-0.0007, 0.00070]], [[-0.0052, 0.00370]], [[-0.0370, 0.00000]], [[-0.0052, -0.0037]], [[-0.0007, -0.0007]]]]) num_kernels = [2] kernels = K.constant(matrix, dtype='float32') kernels = K.tile(kernels, [1, 1, image_shape[-1], 1]) # Use depth-wise convolution to calculate edge maps per channel. # Output tensor has shape [batch_size, h, w, d * num_kernels]. pad_sizes = [[0, 0], [2, 2], [2, 2], [0, 0]] padded = pad(image, pad_sizes, mode='REFLECT') output = K.depthwise_conv2d(padded, kernels) if not magnitude: # direction of edges # Reshape to [batch_size, h, w, d, num_kernels]. shape = K.concatenate([image_shape, num_kernels], axis=0) output = K.reshape(output, shape=shape) output.set_shape(static_image_shape.concatenate(num_kernels)) output = tf.atan(K.squeeze(output[:, :, :, :, 0] / output[:, :, :, :, 1], axis=None)) # magnitude of edges -- unified x & y edges don't work well with Neural Networks return output
def _interpolate(self, image, sampled_grids, output_size): batch_size = K.shape(image)[0] height = K.shape(image)[1] width = K.shape(image)[2] num_channels = K.shape(image)[3] x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32') y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32') x = .5 * (x + 1.0) * K.cast(width, dtype='float32') y = .5 * (y + 1.0) * K.cast(height, dtype='float32') x0 = K.cast(x, 'int32') x1 = x0 + 1 y0 = K.cast(y, 'int32') y1 = y0 + 1 max_x = int(K.int_shape(image)[2] - 1) max_y = int(K.int_shape(image)[1] - 1) x0 = K.clip(x0, 0, max_x) x1 = K.clip(x1, 0, max_x) y0 = K.clip(y0, 0, max_y) y1 = K.clip(y1, 0, max_y) pixels_batch = K.arange(0, batch_size) * (height * width) pixels_batch = K.expand_dims(pixels_batch, axis=-1) flat_output_size = output_size[0] * output_size[1] base = K.repeat_elements(pixels_batch, flat_output_size, axis=1) base = K.flatten(base) # base_y0 = base + (y0 * width) base_y0 = y0 * width base_y0 = base + base_y0 # base_y1 = base + (y1 * width) base_y1 = y1 * width base_y1 = base_y1 + base indices_a = base_y0 + x0 indices_b = base_y1 + x0 indices_c = base_y0 + x1 indices_d = base_y1 + x1 flat_image = K.reshape(image, shape=(-1, num_channels)) flat_image = K.cast(flat_image, dtype='float32') pixel_values_a = K.gather(flat_image, indices_a) pixel_values_b = K.gather(flat_image, indices_b) pixel_values_c = K.gather(flat_image, indices_c) pixel_values_d = K.gather(flat_image, indices_d) x0 = K.cast(x0, 'float32') x1 = K.cast(x1, 'float32') y0 = K.cast(y0, 'float32') y1 = K.cast(y1, 'float32') area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1) area_b = K.expand_dims(((x1 - x) * (y - y0)), 1) area_c = K.expand_dims(((x - x0) * (y1 - y)), 1) area_d = K.expand_dims(((x - x0) * (y - y0)), 1) values_a = area_a * pixel_values_a values_b = area_b * pixel_values_b values_c = area_c * pixel_values_c values_d = area_d * pixel_values_d return values_a + values_b + values_c + values_d
split = SplitVector() couple = AddCouple() concat = ConcatVector() scale = Scale() basic_model_1 = build_basic_model(original_dim // 2) basic_model_2 = build_basic_model(original_dim // 2) basic_model_3 = build_basic_model(original_dim // 2) basic_model_4 = build_basic_model(original_dim // 2) x_in = Input(shape=(original_dim, )) x = x_in # 给输入加入负噪声 x = Lambda( lambda s: K.in_train_phase(s - 0.01 * K.random_uniform(K.shape(s)), s))(x) x = shuffle1(x) x1, x2 = split(x) mx1 = basic_model_1(x1) x1, x2 = couple([x1, x2, mx1]) x = concat([x1, x2]) x = shuffle2(x) x1, x2 = split(x) mx1 = basic_model_2(x1) x1, x2 = couple([x1, x2, mx1]) x = concat([x1, x2]) x = shuffle3(x) x1, x2 = split(x)
def yolo_loss(args, anchors, n_classes, ignore_thresh=0.5): n_layers = len(args) // 2 y_preds = args[:n_layers] # [B,H,W,3*(4+1+c)], level0->level2 y_trues = args[n_layers:] # [B,H,W,3,4+1+c], level0->level2 anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # big object---> P0 input_shape = [y_trues[0]._keras_shape[i] * 32 for i in [1, 2]] grid_shapes = [y_trues[i]._keras_shape[1:3] for i in range(3)] loss = 0. xy_loss_, wh_loss_, ciou_loss_, conf_loss_, cls_loss_, = 0., 0., 0., 0., 0. m = K.shape(y_preds[0])[0] # batch size mf = K.cast(m, K.dtype(y_preds[0])) # batch size to float for i in range(n_layers): anchors_l = anchors[anchor_mask[i]] conf_gt = y_trues[i][..., 4:5] cls_gt = y_trues[i][..., 5:] feats = K.reshape(y_preds[i], (-1, grid_shapes[i][0], grid_shapes[i][1], len(anchors_l), 4 + 1 + n_classes)) grid_coords, pred_xy, pred_wh, pred_conf, pred_cls = bbox( feats, anchors_l, n_classes, input_shape) # normed pred_box = K.concatenate([pred_xy, pred_wh]) # normed conf_gt = y_trues[i][..., 4:5] cls_gt = y_trues[i][..., 5:] xy_gt = y_trues[i][ ..., :2] * grid_shapes[i][::-1] - grid_coords # offset to grid wh_gt = K.log(y_trues[i][..., 2:4] * input_shape[::-1] / anchors_l) # offset to grid # wh being too small would cause log(0)=-inf, in this case replace the infs with 0 wh_gt = K.switch(conf_gt, wh_gt, K.zeros_like(wh_gt)) box_loss_scale = 2 - y_trues[i][..., 2:3] * y_trues[i][..., 3:4] # box_loss: xy_loss+wh_loss / iou loss # xy_loss: bce, based on grid center xy_loss = conf_gt * box_loss_scale * K.binary_crossentropy( xy_gt, feats[..., 0:2], from_logits=True) # wh_loss: l2, based on anchor shape wh_loss = conf_gt * box_loss_scale * 0.5 * K.square(wh_gt - feats[..., 2:4]) # ciou_loss: iou ciou = tf.expand_dims(bbox_ciou(y_trues[i][..., :4], pred_box, grid_coords, grid_shapes[i], input_shape, anchors_l), axis=-1) ciou_loss = conf_gt * box_loss_scale * (1 - ciou) # conf_loss: bce # ignore mask: objects on gt mask which has iou<ignore_thresh with anchors ignore_mask = tf.TensorArray(K.dtype(y_trues[0]), size=1, dynamic_size=True) # 动态size数组 object_mask = tf.cast(conf_gt, tf.bool) def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_trues[i][b, ..., 0:4], object_mask[ b, ..., 0]) # flattened(h*w*a*mask) gt boxes for current sample [N,4] iou = box_iou(pred_box[b], true_box) # [H,W,a,N] best_iou = K.max(iou, axis=-1, keepdims=True) # [H,W,a,1] ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() # # [b,H,W,a,1] conf_loss = conf_gt * K.binary_crossentropy(conf_gt, feats[...,4:5], from_logits=True)+ \ (1-conf_gt) * ignore_mask* K.binary_crossentropy(conf_gt, feats[...,4:5], from_logits=True) # cls_loss: bce cls_loss = conf_gt * K.binary_crossentropy( cls_gt, feats[..., 5:], from_logits=True) # xy_loss_ += K.sum(xy_loss) / mf # wh_loss_ += K.sum(wh_loss) / mf ciou_loss_ += K.sum(ciou_loss_) / mf conf_loss_ += K.sum(conf_loss) / mf cls_loss_ += K.sum(cls_loss) / mf # loss = xy_loss_ + wh_loss_ + conf_loss_ + cls_loss_ loss = ciou_loss_ + conf_loss_ + cls_loss_ # return loss return tf.stack([loss, xy_loss_, wh_loss_, conf_loss_, cls_loss_], axis=0)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def mytransform(source, ref_tensor): target_shape = K.shape(ref_tensor) source_shape = K.shape(source) return K.resize_images(source, target_shape[1] / source_shape[1], target_shape[2] / source_shape[2], "channels_last")
def call(self, x, mask=None): return K.in_train_phase( K.relu(x, K.random_uniform(K.shape(x), self.l, self.u)), K.relu(x, self.average))
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3) ] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2] - pred_xy) * grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta * box_delta_scale) confidence_loss = object_mask * K.square(1-pred_confidence) + \ (1-object_mask) * K.square(0-pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs - pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss
# 重参数技巧 def call(self, inputs): z, shift, log_scale = inputs z = K.exp(log_scale) * z + shift logdet = -K.sum(K.mean(log_scale, 0)) self.add_loss(logdet) return z # 算p(Z|X)的均值和方差 z_shift = Dense(z_dim)(x) z_log_scale = Dense(z_dim)(x) # 重参数层,相当于给输入加入噪声 u = Lambda(lambda z: K.random_normal(shape=K.shape(z)))(z_shift) z = ScaleShift()([u, z_shift, z_log_scale]) x_recon = decoder(z) x_out = Subtract()([x_in, x_recon]) # xent_loss是重构loss,z_loss是KL loss recon_loss = 0.5 * K.sum(K.mean(x_out**2, 0)) + 0.5 * np.log( 2 * np.pi) * np.prod(K.int_shape(x_out)[1:]) z_loss = 0.5 * K.sum(K.mean(z**2, 0)) - 0.5 * K.sum(K.mean(u**2, 0)) vae_loss = recon_loss + z_loss vae = Model(x_in, x_out) vae.add_loss(vae_loss) vae.compile(optimizer=Adam(1e-4))
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def ComplexBN(input_centred, Vrr, Vii, Vri, beta, gamma_rr, gamma_ri, gamma_ii, scale=True, center=True, layernorm=False, axis=-1): ndim = K.ndim(input_centred) input_dim = K.shape(input_centred)[axis] // 2 if scale: gamma_broadcast_shape = [1] * ndim gamma_broadcast_shape[axis] = input_dim if center: broadcast_beta_shape = [1] * ndim broadcast_beta_shape[axis] = input_dim * 2 if scale: standardized_output = complex_standardization(input_centred, Vrr, Vii, Vri, layernorm, axis=axis) # Now we perform th scaling and Shifting of the normalized x using # the scaling parameter # [ gamma_rr gamma_ri ] # Gamma = [ gamma_ri gamma_ii ] # and the shifting parameter # Beta = [beta_real beta_imag].T # where: # x_real_BN = gamma_rr * x_real_normed + gamma_ri * x_imag_normed + beta_real # x_imag_BN = gamma_ri * x_real_normed + gamma_ii * x_imag_normed + beta_imag broadcast_gamma_rr = K.reshape(gamma_rr, gamma_broadcast_shape) broadcast_gamma_ri = K.reshape(gamma_ri, gamma_broadcast_shape) broadcast_gamma_ii = K.reshape(gamma_ii, gamma_broadcast_shape) cat_gamma_4_real = K.concatenate( [broadcast_gamma_rr, broadcast_gamma_ii], axis=axis) cat_gamma_4_imag = K.concatenate( [broadcast_gamma_ri, broadcast_gamma_ri], axis=axis) if (axis == 1 and ndim != 3) or ndim == 2: centred_real = standardized_output[:, :input_dim] centred_imag = standardized_output[:, input_dim:] elif ndim == 3: centred_real = standardized_output[:, :, :input_dim] centred_imag = standardized_output[:, :, input_dim:] elif axis == -1 and ndim == 4: centred_real = standardized_output[:, :, :, :input_dim] centred_imag = standardized_output[:, :, :, input_dim:] elif axis == -1 and ndim == 5: centred_real = standardized_output[:, :, :, :, :input_dim] centred_imag = standardized_output[:, :, :, :, input_dim:] else: raise ValueError( 'Incorrect Batchnorm combination of axis and dimensions. axis should be either 1 or -1. ' 'axis: ' + str(self.axis) + '; ndim: ' + str(ndim) + '.') rolled_standardized_output = K.concatenate( [centred_imag, centred_real], axis=axis) if center: broadcast_beta = K.reshape(beta, broadcast_beta_shape) return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output + broadcast_beta else: return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output else: if center: broadcast_beta = K.reshape(beta, broadcast_beta_shape) return input_centred + broadcast_beta else: return input_centred
print('Model loaded.') # Get the symbolic outputs of each "key" layer (we gave them unique names). layer_dict = dict([(layer.name, layer) for layer in model.layers]) # Define the loss. loss = K.variable(0.) for layer_name in settings['features']: # Add the L2 norm of the features of a layer to the loss. assert layer_name in layer_dict.keys( ), 'Layer ' + layer_name + ' not found in model.' coeff = settings['features'][layer_name] x = layer_dict[layer_name].output # We avoid border artifacts by only involving non-border pixels in the loss. scaling = K.prod(K.cast(K.shape(x), 'float32')) if K.image_data_format() == 'channels_first': loss += coeff * K.sum(K.square(x[:, :, 2:-2, 2:-2])) / scaling else: loss += coeff * K.sum(K.square(x[:, 2:-2, 2:-2, :])) / scaling # Compute the gradients of the dream wrt the loss. grads = K.gradients(loss, dream)[0] # Normalize gradients. grads /= K.maximum(K.mean(K.abs(grads)), 1e-7) # Set up function to retrieve the value # of the loss and gradients given an input image. outputs = [loss, grads] fetch_loss_and_grads = K.function([dream], outputs)
def __init__(self, model, momentum=0.9999): self.momentum = momentum self.model = model self.ema_weights = [K.zeros(K.shape(w)) for w in model.weights]
def call(self, inputs): z_mean, z_log_var = inputs epsilon = K.random_normal(shape=(K.shape(z_mean)[0], self.latent_dim)) return z_mean + K.exp(z_log_var / 2) * epsilon
def timedistributed_concat(packed): x, pic = packed return K.concatenate([x, K.repeat(pic, K.shape(x)[-2])], axis=-1)
def call(self, inputs, training=None, mask=None): input_shape = K.shape(inputs) if self.rank == 1: input_shape = [input_shape[i] for i in range(3)] batch_shape, dim, channels = input_shape xx_range = tf.tile(K.expand_dims(K.arange(0, dim), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=-1) xx_channels = K.cast(xx_range, K.floatx()) xx_channels = xx_channels / K.cast(dim - 1, K.floatx()) xx_channels = (xx_channels * 2) - 1. outputs = K.concatenate([inputs, xx_channels], axis=-1) if self.rank == 2: if self.data_format == 'channels_first': inputs = K.permute_dimensions(inputs, [0, 2, 3, 1]) input_shape = [input_shape[i] for i in range(4)] batch_shape, dim1, dim2, channels = input_shape xx_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32') xx_ones = K.expand_dims(xx_ones, axis=-1) xx_range = tf.tile(K.expand_dims(K.arange(0, dim1), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=1) xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1]) xx_channels = K.expand_dims(xx_channels, axis=-1) xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3]) yy_ones = K.ones(K.stack([batch_shape, dim1]), dtype='int32') yy_ones = K.expand_dims(yy_ones, axis=1) yy_range = tf.tile(K.expand_dims(K.arange(0, dim2), axis=0), K.stack([batch_shape, 1])) yy_range = K.expand_dims(yy_range, axis=-1) yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1]) yy_channels = K.expand_dims(yy_channels, axis=-1) yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3]) xx_channels = K.cast(xx_channels, K.floatx()) xx_channels = xx_channels / K.cast(dim1 - 1, K.floatx()) xx_channels = (xx_channels * 2) - 1. yy_channels = K.cast(yy_channels, K.floatx()) yy_channels = yy_channels / K.cast(dim2 - 1, K.floatx()) yy_channels = (yy_channels * 2) - 1. outputs = K.concatenate([inputs, xx_channels, yy_channels], axis=-1) if self.use_radius: rr = K.sqrt(K.square(xx_channels - 0.5) + K.square(yy_channels - 0.5)) outputs = K.concatenate([outputs, rr], axis=-1) if self.data_format == 'channels_first': outputs = K.permute_dimensions(outputs, [0, 3, 1, 2]) if self.rank == 3: if self.data_format == 'channels_first': inputs = K.permute_dimensions(inputs, [0, 2, 3, 4, 1]) input_shape = [input_shape[i] for i in range(5)] batch_shape, dim1, dim2, dim3, channels = input_shape xx_ones = K.ones(K.stack([batch_shape, dim3]), dtype='int32') xx_ones = K.expand_dims(xx_ones, axis=-1) xx_range = tf.tile(K.expand_dims(K.arange(0, dim2), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=1) xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1]) xx_channels = K.expand_dims(xx_channels, axis=-1) xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3]) xx_channels = K.expand_dims(xx_channels, axis=1) xx_channels = tf.tile(xx_channels, [1, dim1, 1, 1, 1]) yy_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32') yy_ones = K.expand_dims(yy_ones, axis=1) yy_range = tf.tile(K.expand_dims(K.arange(0, dim3), axis=0), K.stack([batch_shape, 1])) yy_range = K.expand_dims(yy_range, axis=-1) yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1]) yy_channels = K.expand_dims(yy_channels, axis=-1) yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3]) yy_channels = K.expand_dims(yy_channels, axis=1) yy_channels = tf.tile(yy_channels, [1, dim1, 1, 1, 1]) zz_range = tf.tile(K.expand_dims(K.arange(0, dim1), axis=0), K.stack([batch_shape, 1])) zz_range = K.expand_dims(zz_range, axis=-1) zz_range = K.expand_dims(zz_range, axis=-1) zz_channels = tf.tile(zz_range, [1, 1, dim2, dim3]) zz_channels = K.expand_dims(zz_channels, axis=-1) xx_channels = K.cast(xx_channels, K.floatx()) xx_channels = xx_channels / K.cast(dim2 - 1, K.floatx()) xx_channels = xx_channels * 2 - 1. yy_channels = K.cast(yy_channels, K.floatx()) yy_channels = yy_channels / K.cast(dim3 - 1, K.floatx()) yy_channels = yy_channels * 2 - 1. zz_channels = K.cast(zz_channels, K.floatx()) zz_channels = zz_channels / K.cast(dim1 - 1, K.floatx()) zz_channels = zz_channels * 2 - 1. outputs = K.concatenate([inputs, zz_channels, xx_channels, yy_channels], axis=-1) if self.data_format == 'channels_first': outputs = K.permute_dimensions(outputs, [0, 4, 1, 2, 3]) return outputs
def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(K.shape(z_mean)[0], input_shape), mean=0., stddev=1.) return z_mean + K.exp(z_log_var / 2) * epsilon
def call(self, inputs, **kwargs): input_shape = K.shape(inputs) batch_size, seq_len = input_shape[0], input_shape[1] pos_embeddings = self.embeddings[:seq_len] pos_embeddings = K.expand_dims(pos_embeddings, 0) return inputs + pos_embeddings
def eye_like(C): return K.eye(K.shape(C)[0])
def build(self, mode, config): """Build Mask R-CNN architecture. input_shape: The shape of the input image. mode: Either "training" or "inference". The inputs and outputs of the model differ accordingly. """ assert mode in ['training', 'inference'] # Image size must be dividable by 2 multiple times h, w = config.IMAGE_SHAPE[:2] if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): raise Exception( "Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. ") # Inputs input_image = KL.Input(shape=config.IMAGE_SHAPE.tolist(), name="input_image") input_image_meta = KL.Input(shape=[None], name="input_image_meta") if mode == "training": # RPN GT input_rpn_match = KL.Input(shape=[None, 1], name="input_rpn_match", dtype=tf.int32) input_rpn_bbox = KL.Input(shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32) # Detection GT (class IDs, bounding boxes, and masks) # 1. GT Class IDs (zero padded) input_gt_class_ids = KL.Input(shape=[None], name="input_gt_class_ids", dtype=tf.int32) # 2. GT Boxes in pixels (zero padded) # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates input_gt_boxes = KL.Input(shape=[None, 4], name="input_gt_boxes", dtype=tf.float32) # Normalize coordinates h, w = K.shape(input_image)[1], K.shape(input_image)[2] image_scale = K.cast(K.stack([h, w, h, w], axis=0), tf.float32) gt_boxes = KL.Lambda(lambda x: x / image_scale)(input_gt_boxes) # 3. GT Masks (zero padded) # [batch, height, width, MAX_GT_INSTANCES] if config.USE_MINI_MASK: input_gt_masks = KL.Input(shape=[ config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], None ], name="input_gt_masks", dtype=bool) else: input_gt_masks = KL.Input( shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None], name="input_gt_masks", dtype=bool) # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. _, C2, C3, C4, C5 = resnet_graph(input_image, "resnet101", stage5=True) # Top-down Layers # TODO: add assert to varify feature map sizes match what's in tf_config P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5) P4 = KL.Add(name="fpn_p4add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4) ]) P3 = KL.Add(name="fpn_p3add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3) ]) P2 = KL.Add(name="fpn_p2add")([ KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2) ]) # Attach 3x3 conv to all P layers to get the final feature maps. P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2) P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3) P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4) P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5) # P6 is used for the 5th anchor scale in RPN. Generated by # subsampling from P5 with stride of 2. P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) # Note that P6 is used in RPN, but not in the classifier heads. rpn_feature_maps = [P2, P3, P4, P5, P6] mrcnn_feature_maps = [P2, P3, P4, P5] # Generate Anchors self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, config.BACKBONE_SHAPES, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE) # RPN Model rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE, len(config.RPN_ANCHOR_RATIOS), 256) # Loop through pyramid layers layer_outputs = [] # list of lists for p in rpn_feature_maps: layer_outputs.append(rpn([p])) # Concatenate layer outputs# RPN GT input_rpn_match = KL.Input(shape=[None, 1], name="input_rpn_match", dtype=tf.int32) input_rpn_bbox = KL.Input(shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32) # Detection GT (class IDs, bounding boxes, and masks) # 1. GT Class IDs (zero padded) input_gt_class_ids = KL.Input(shape=[None], name="input_gt_class_ids", dtype=tf.int32) # 2. GT Boxes in pixels (zero padded) # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates input_gt_boxes = KL.Input(shape=[None, 4], name="input_gt_boxes", dtype=tf.float32) # Normalize coordinates h, w = K.shape(input_image)[1], K.shape(input_image)[2] image_scale = K.cast(K.stack([h, w, h, w], axis=0), tf.float32) gt_boxes = KL.Lambda(lambda x: x / image_scale)(input_gt_boxes) # 3. GT Masks (zero padded) # [batch, height, width, MAX_GT_INSTANCES] if config.USE_MINI_MASK: input_gt_masks = KL.Input(shape=[ config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], None ], name="input_gt_masks", dtype=bool) else: input_gt_masks = KL.Input( shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None], name="input_gt_masks", dtype=bool) # Convert from list of lists of level outputs to list of lists # of outputs across levels. # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] outputs = list(zip(*layer_outputs)) outputs = [ KL.Concatenate(axis=1, name=n)(list(o)) for o, n in zip(outputs, output_names) ] rpn_class_logits, rpn_class, rpn_bbox = outputs # Generate proposals # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates # and zero padded. proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\ else config.POST_NMS_ROIS_INFERENCE rpn_rois = ProposalLayer(proposal_count=proposal_count, nms_threshold=config.RPN_NMS_THRESHOLD, name="ROI", anchors=self.anchors, config=config)([rpn_class, rpn_bbox]) if mode == "training": # Class ID mask to mark class IDs supported by the dataset the image # came from. _, _, _, active_class_ids = KL.Lambda( lambda x: parse_image_meta_graph(x), mask=[None, None, None, None])(input_image_meta) if not config.USE_RPN_ROIS: # Ignore predicted ROIs and use ROIs provided as an input. input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4], name="input_roi", dtype=np.int32) # Normalize coordinates to 0-1 range. target_rois = KL.Lambda(lambda x: K.cast(x, tf.float32) / image_scale[:4])(input_rois) else: target_rois = rpn_rois # Generate detection targets # Subsamples proposals and generates target outputs for training # Note that proposal class IDs, gt_boxes, and gt_masks are zero # padded. Equally, returned rois and targets are zero padded. rois, target_class_ids, target_bbox, target_mask =\ DetectionTargetLayer(config, name="proposal_targets")([ target_rois, input_gt_class_ids, gt_boxes, input_gt_masks]) # Network Heads # TODO: verify that this handles zero padded ROIs mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ fpn_classifier_graph(rois, mrcnn_feature_maps, config.IMAGE_SHAPE, config.POOL_SIZE, config.NUM_CLASSES) mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps, config.IMAGE_SHAPE, config.MASK_POOL_SIZE, config.NUM_CLASSES) # TODO: clean up (use tf.identify if necessary) output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois) # Losses rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")( [input_rpn_match, rpn_class_logits]) rpn_bbox_loss = KL.Lambda( lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")( [input_rpn_bbox, input_rpn_match, rpn_bbox]) class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")([ target_class_ids, mrcnn_class_logits, active_class_ids ]) bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")([ target_bbox, target_class_ids, mrcnn_bbox ]) mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")([ target_mask, target_class_ids, mrcnn_mask ]) # Model inputs = [ input_image, input_image_meta, input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks ] if not config.USE_RPN_ROIS: inputs.append(input_rois) outputs = [ rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois, rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss ] model = KM.Model(inputs, outputs, name='mask_rcnn') else: # Network Heads # Proposal classifier and BBox regressor heads mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, config.IMAGE_SHAPE, config.POOL_SIZE, config.NUM_CLASSES) # Detections # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates detections = DetectionLayer(config, name="mrcnn_detection")( [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta]) # Convert boxes to normalized coordinates # TODO: let DetectionLayer return normalized coordinates to avoid # unnecessary conversions h, w = config.IMAGE_SHAPE[:2] detection_boxes = KL.Lambda( lambda x: x[..., :4] / np.array([h, w, h, w]))(detections) # Create masks for detections mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps, config.IMAGE_SHAPE, config.MASK_POOL_SIZE, config.NUM_CLASSES) model = KM.Model([input_image, input_image_meta], [ detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, rpn_class, rpn_bbox ], name='mask_rcnn') # Add multi-GPU support. if config.GPU_COUNT > 1: from parallel_model import ParallelModel model = ParallelModel(model, config.GPU_COUNT) return model
def sample_z(args): z_mu, z_sigma = args eps = K.random_normal(shape=(K.shape(z_mu)[0], K.int_shape(z_mu)[1])) return z_mu + K.exp(z_sigma / 2) * eps
def shape(self, x): return K.int_shape(x) if self.backend == 'tensorflow' else K.shape(x)
decoder = Sequential([ Dense(intermediate_dim, input_dim=latent_dim, activation='relu'), Dense(original_dim, activation='sigmoid') ]) x = Input(shape=(original_dim, )) h = Dense(intermediate_dim, activation='relu')(x) z_mu = Dense(latent_dim)(h) z_log_var = Dense(latent_dim)(h) z_mu, z_log_var = KLDivergenceLayer()([z_mu, z_log_var]) z_sigma = Lambda(lambda t: K.exp(.5 * t))(z_log_var) eps = Input(tensor=K.random_normal(stddev=epsilon_std, shape=(K.shape(x)[0], latent_dim))) z_eps = Multiply()([z_sigma, eps]) z = Add()([z_mu, z_eps]) x_pred = decoder(z) vae = Model(inputs=[x, eps], outputs=x_pred) vae.compile(optimizer='adam', loss=nll) vae.fit(X_train, X_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, X_test)) encoder = Model(x, z_mu) # plot 2D del espacio latente del autoencoder z_test = encoder.predict(X_test, batch_size=batch_size) plt.figure(figsize=(6, 6))
def sampling(args): mu, log_var = args epsilon = K.random_normal(shape=K.shape(mu), mean=0, stddev=1.0) return mu + K.exp(log_var / 2) * epsilon