def make_patches_grid(x, patch_size, patch_stride):
    '''Break image `x` up into a grid of patches.

    input shape: (channels, rows, cols)
    output shape: (rows, cols, channels, patch_rows, patch_cols)
    '''
    from theano.tensor.nnet.neighbours import images2neibs  # TODO: all K, no T
    x = K.expand_dims(x, 0)
    xs = K.shape(x)
    num_rows = 1 + (xs[-2] - patch_size) // patch_stride
    num_cols = 1 + (xs[-1] - patch_size) // patch_stride
    num_channels = xs[-3]
    patches = images2neibs(
        x, (patch_size, patch_size), (patch_stride, patch_stride),
        mode='valid')
    # neibs are sorted per-channel
    patches = K.reshape(patches,
                        (num_channels, K.shape(patches)[0] // num_channels,
                         patch_size, patch_size))
    patches = K.permute_dimensions(patches, (1, 0, 2, 3))
    # arrange in a 2d-grid (rows, cols, channels, px, py)
    patches = K.reshape(
        patches, (num_rows, num_cols, num_channels, patch_size, patch_size))
    patches_norm = K.sqrt(
        K.sum(K.square(patches), axis=(2, 3, 4), keepdims=True))
    return patches, patches_norm
 def call(self, x):
     #如果只传入Q_seq,K_seq,V_seq,那么就不做Mask
     #如果同时传入Q_seq,K_seq,V_seq,Q_len,V_len,那么对多余部分做Mask
     if len(x) == 3:
         Q_seq,K_seq,V_seq = x
         Q_len,V_len = None,None
     elif len(x) == 5:
         Q_seq,K_seq,V_seq,Q_len,V_len = x
     #对Q、K、V做线性变换
     Q_seq = K.dot(Q_seq, self.WQ)
     Q_seq = K.reshape(Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head))
     Q_seq = K.permute_dimensions(Q_seq, (0,2,1,3))
     K_seq = K.dot(K_seq, self.WK)
     K_seq = K.reshape(K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head))
     K_seq = K.permute_dimensions(K_seq, (0,2,1,3))
     V_seq = K.dot(V_seq, self.WV)
     V_seq = K.reshape(V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head))
     V_seq = K.permute_dimensions(V_seq, (0,2,1,3))
     #计算内积,然后mask,然后softmax
     A = K.batch_dot(Q_seq, K_seq, axes=[3,3])
     A = K.permute_dimensions(A, (0,3,2,1))
     A = self.Mask(A, V_len, 'add')
     A = K.permute_dimensions(A, (0,3,2,1))    
     A = K.softmax(A)
     #输出并mask
     O_seq = K.batch_dot(A, V_seq, axes=[3,2])
     O_seq = K.permute_dimensions(O_seq, (0,2,1,3))
     O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim))
     O_seq = self.Mask(O_seq, Q_len, 'mul')
     return O_seq
Beispiel #3
0
    def get_output(self, train=False):
        def format_shape(shape):
            if K._BACKEND == 'tensorflow':
                def trf(x):
                    try:
                        return int(x)
                    except TypeError:
                        return x

                return map(trf, shape)
            return shape

        X = self.get_input(train)

        in_shape = format_shape(K.shape(X))
        batch_flatten_len = K.prod(in_shape[:2])
        cast_in_shape = (batch_flatten_len, ) + tuple(in_shape[i] for i in range(2, K.ndim(X)))
        
        pre_outs = self.layer(K.reshape(X, cast_in_shape))
        
        out_shape = format_shape(K.shape(pre_outs))
        cast_out_shape = (in_shape[0], in_shape[1]) + tuple(out_shape[i] for i in range(1, K.ndim(pre_outs)))
        
        outputs = K.reshape(pre_outs, cast_out_shape)
        return outputs
    def __call__(self, loss):
        from . import patches

        output = self.layer.get_output(True)
        assert K.ndim(output) == 4
        batch_size = K.shape(output)[0] // 2
        patch_size = self.patch_size
        patch_stride = 1
        generated = output[:batch_size, :, :, :]
        content = output[batch_size:, :, :, :]
        # extract patches from feature maps
        generated_patches, generated_patches_norm = \
            patches.make_patches(generated, patch_size, patch_stride)
        content_patches, content_patches_norm = \
            patches.make_patches(content, patch_size, patch_stride)
        a_patches, a_patches_norm = \
            patches.make_patches(K.variable(self.features_a), patch_size, patch_stride)
        ap_patches, ap_patches_norm = \
            patches.make_patches(K.variable(self.features_ap), patch_size, patch_stride)
        # find best patches and calculate loss
        patch_ids = patches.find_patch_matches(
            content_patches, content_patches_norm,
            a_patches / a_patches_norm)
        best_analogy_patches = K.reshape(
            ap_patches[patch_ids], K.shape(generated_patches))
        loss += self.weight * K.sum(K.square(best_analogy_patches - generated_patches)) / patch_size ** 2
        return loss
 def call(self, X):
     if type(X) is not list or len(X) != 2:
         raise Exception("SquareAttention must be called on a list of two tensors. Got: " + str(X))
         
     frame, position  = X[0], X[1]
     
     # Reshaping the input to exclude the time dimension
     frameShape = K.shape(frame)
     positionShape = K.shape(position)
     (chans, height, width) = frameShape[-3:]
     targetDim = positionShape[-1]
     frame = K.reshape(frame, (-1, chans, height, width))
     position = K.reshape(position, (-1, ) + (targetDim, ))
     
     # Applying the attention
     hw = THT.abs_(position[:, 2] - position[:, 0]) * self.scale / 2.0
     hh = THT.abs_(position[:, 3] - position[:, 1]) * self.scale / 2.0
     position = THT.maximum(THT.set_subtensor(position[:, 0], position[:, 0] - hw), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 2], position[:, 2] + hw), 1.0)
     position = THT.maximum(THT.set_subtensor(position[:, 1], position[:, 1] - hh), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 3], position[:, 3] + hh), 1.0)
     rX = Data.linspace(-1.0, 1.0, width)
     rY = Data.linspace(-1.0, 1.0, height)
     FX = THT.gt(rX, position[:,0].dimshuffle(0,'x')) * THT.le(rX, position[:,2].dimshuffle(0,'x'))
     FY = THT.gt(rY, position[:,1].dimshuffle(0,'x')) * THT.le(rY, position[:,3].dimshuffle(0,'x'))
     m = FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1)
     m = m + self.alpha - THT.gt(m, 0.) * self.alpha
     frame = frame * m.dimshuffle(0, 'x', 1, 2)
     
     # Reshaping the frame to include time dimension
     output = K.reshape(frame, frameShape)
     
     return output
Beispiel #6
0
 def call(self, position):
     inputDim = K.ndim(position)
     positionShape = K.shape(position)
     targetDim = positionShape[-1]
     position = K.reshape(position, (-1, targetDim))
     samples = K.shape(position)[0]
     theta = THT.zeros((samples, 3, 3))
     
     chw = self.toChw(position)
     chw = K.reshape(chw, (samples, targetDim))
     dx = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,)) 
     dy = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,))
     cX = chw[:, 0] + dx
     cY = chw[:, 1] + dy
     h = K.maximum(chw[:, 2] * (1.0 + self.context), self.minSide)
     w = K.maximum(chw[:, 3] * (1.0 + self.context), self.minSide)
     
     # Calculating the parameters of the transformation
     tx = cX
     ty = cY
     sx = w / 2.0 # Scale x
     sy = h / 2.0 # Scale y
     
     # Setting transformation
     theta = THT.set_subtensor(theta[:, 0, 0], sx)
     theta = THT.set_subtensor(theta[:, 1, 1], sy)
     theta = THT.set_subtensor(theta[:, 0, 2], tx)
     theta = THT.set_subtensor(theta[:, 1, 2], ty)
     theta = THT.set_subtensor(theta[:, 2, 2], 1.0)
     
     thetaShape = K.concatenate([positionShape[:-1], K.shape(theta)[-2:]])
     theta = THT.reshape(theta, thetaShape, ndim=inputDim + 1)
     
     return theta
Beispiel #7
0
    def call(self, inputs, **kwargs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.

        This change can improve the feature representation of the capsule.

        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to get standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        print(self.routings)
        for i in range(self.routings):
            c = K.softmax(b, 1)
            o = self.activation(K.batch_dot(c, hat_inputs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(o, hat_inputs, [2, 3])
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)
        return o
 def call(self, X):
     if type(X) is not list or len(X) != 2:
         raise Exception("GaussianAttention must be called on a list of two tensors. Got: " + str(X))
     
     frame, position  = X[0], X[1]
     
     # Reshaping the input to exclude the time dimension
     frameShape = K.shape(frame)
     positionShape = K.shape(position)
     (chans, height, width) = frameShape[-3:]
     targetDim = positionShape[-1]
     frame = K.reshape(frame, (-1, chans, height, width))
     position = K.reshape(position, (-1, ) + (targetDim, ))
     
     cx = (position[:, 0] + position[:, 2]) / 2.0
     cy = (position[:, 1] + position[:, 3]) / 2.0
     sx = (position[:, 2] - cx) * 0.60
     sy = (position[:, 3] - cy) * 0.60
     rX = Data.linspace(-1.0, 1.0, width)
     rY = Data.linspace(-1.0, 1.0, height)
     FX = K.exp(-(rX - cx.dimshuffle(0, 'x')) ** 2 / (2.0 * (sx.dimshuffle(0, 'x') ** 2 + self.epsilon)))
     FY = K.exp(-(rY - cy.dimshuffle(0, 'x')) ** 2 / (2.0 * (sy.dimshuffle(0, 'x') ** 2 + self.epsilon)))
     m = (FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1))
     m = m + self.alpha
     m = m - K.greater(m, 1.0) * (m - 1.0)
     
     frame = frame * m.dimshuffle(0, 'x', 1, 2)
     
     # Reshaping the frame to include time dimension
     output = K.reshape(frame, frameShape)
     
     return output
    def get_split_averages(input_tensor, input_mask, indices):
        # Splits input tensor into three parts based on the indices and
        # returns average of values prior to index, values at the index and
        # average of values after the index.
        # input_tensor: (batch_size, input_length, input_dim)
        # input_mask: (batch_size, input_length)
        # indices: (batch_size, 1)
        # (1, input_length)
        length_range = K.expand_dims(K.arange(K.shape(input_tensor)[1]), dim=0)
        # (batch_size, input_length)
        batched_range = K.repeat_elements(length_range, K.shape(input_tensor)[0], 0)
        tiled_indices = K.repeat_elements(indices, K.shape(input_tensor)[1], 1)  # (batch_size, input_length)
        greater_mask = K.greater(batched_range, tiled_indices)  # (batch_size, input_length)
        lesser_mask = K.lesser(batched_range, tiled_indices)  # (batch_size, input_length)
        equal_mask = K.equal(batched_range, tiled_indices)  # (batch_size, input_length)

        # We also need to mask these masks using the input mask.
        # (batch_size, input_length)
        if input_mask is not None:
            greater_mask = switch(input_mask, greater_mask, K.zeros_like(greater_mask))
            lesser_mask = switch(input_mask, lesser_mask, K.zeros_like(lesser_mask))

        post_sum = K.sum(switch(K.expand_dims(greater_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)
        pre_sum = K.sum(switch(K.expand_dims(lesser_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)
        values_at_indices = K.sum(switch(K.expand_dims(equal_mask), input_tensor, K.zeros_like(input_tensor)), axis=1)  # (batch_size, input_dim)

        post_normalizer = K.expand_dims(K.sum(greater_mask, axis=1) + K.epsilon(), dim=1)  # (batch_size, 1)
        pre_normalizer = K.expand_dims(K.sum(lesser_mask, axis=1) + K.epsilon(), dim=1)  # (batch_size, 1)

        return K.cast(pre_sum / pre_normalizer, 'float32'), values_at_indices, K.cast(post_sum / post_normalizer, 'float32')
Beispiel #10
0
def time_distributed_dense(x, w, b=None, dropout=None,
                           input_dim=None, output_dim=None, timesteps=None, activation='linear'):
    '''Apply y.w + b for every temporal slice y of x.
    '''
    activation = activations.get(activation)

    if not input_dim:
        # won't work with TensorFlow
        input_dim = K.shape(x)[2]
    if not timesteps:
        # won't work with TensorFlow
        timesteps = K.shape(x)[1]
    if not output_dim:
        # won't work with TensorFlow
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))

    x = K.dot(x, w)
    if b:
        x = x + b
    # reshape to 3D tensor
    x = K.reshape(activation(x), (-1, timesteps, output_dim))
    return x
Beispiel #11
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(T, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    yolo_outputs = args[:3]
    y_true = args[3:]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]

    for l in range(3):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l],
             anchors[anchor_mask[l]], num_classes, input_shape)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet box loss.
        xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1]
        wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh)
        # Avoid log(0)=-inf.
        wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta))
        box_delta = K.concatenate([xy_delta, wh_delta], axis=-1)
        box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        box_loss = object_mask * K.square(box_delta*box_delta_scale)
        confidence_loss = object_mask * K.square(1-pred_confidence) + \
            (1-object_mask) * K.square(0-pred_confidence) * ignore_mask
        class_loss = object_mask * K.square(true_class_probs-pred_class_probs)
        loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss)
    return loss / K.cast(m, K.dtype(loss))
Beispiel #12
0
 def __call__(self, loss):
     output = self.layer.get_output(True)
     batch_size = K.shape(output)[0] // 2
     generated = output[:batch_size, :, :, :]
     loss += self.weight * K.mean(
         K.sum(K.square(gram_matrix(self.target) - gram_matrix(generated)), axis=(1,2))
     ) / (4.0 * K.square(K.prod(K.shape(generated)[1:])))
     return loss
Beispiel #13
0
def buildMixModel(img_channels=3, lr = 0.01,weight_decay = 1e-7, loss='mse',activ='relu', last_activ='sigmoid'):
    # just build a tiny fcn model, you can use more layers and more filters as you want

    main_input = Input(shape=(img_channels, None, None), name='input')
    conv_1 = Convolution2D(4,3,3, border_mode = 'same', activation= activ, init='orthogonal',name='conv_1',
                                 W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(main_input)
    
    max_1 = MaxPooling2D(pool_size = (2,2))(conv_1)

    conv_2 = Convolution2D(8,3,3, border_mode = 'same', activation=activ, init='orthogonal',name='conv_2',
                                 W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(max_1)
    max_2 = MaxPooling2D(pool_size = (2,2))(conv_2)
    dp_0 =  Dropout(0.25)(max_2) 
    conv_3 = Convolution2D(16,3,3, border_mode = 'same', activation= activ, init='orthogonal',name='conv_3',
                                 W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(dp_0)  # 25
    max_3 = MaxPooling2D(pool_size = (2,2))(conv_3)                                                      # 12

    conv_4 = Convolution2D(32,3,3, border_mode = 'same', activation=activ, init='orthogonal',name='conv_4',
                                 W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(max_3)  # 12
    max_4 = MaxPooling2D(pool_size = (2,2))(conv_4)                                                      # 12
    dp_1 =  Dropout(0.25)(max_4)
    conv_5 = Convolution2D(64,3,3, border_mode = 'same', activation=activ, init='orthogonal',name='conv_5',
                                 W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(dp_1)  # 6

    upsamp_0 = UpSampling2D((2,2))(conv_5)
    resize_0 = Resize2D(K.shape(conv_4))(upsamp_0)
    deconv_0 = Convolution2D(32,3,3, border_mode = 'same', activation=activ, init='orthogonal',name='deconv_0',
                                   W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(resize_0)
    dp_2 =  Dropout(0.25)(deconv_0)
    upsamp_1 = UpSampling2D((2,2))(dp_2)
    resize_1 = Resize2D(K.shape(conv_3))(upsamp_1)
    deconv_1 = Convolution2D(16,3,3, border_mode = 'same', activation=activ, init='orthogonal',name='deconv_1',
                                   W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(resize_1)

    upsamp_2 = UpSampling2D((2,2))(deconv_1)
    resize_2 = Resize2D(K.shape(conv_2))(upsamp_2)
    deconv_2 = Convolution2D(8,3,3, border_mode = 'same', activation=activ,init='orthogonal',name='deconv_2',
                                   W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(resize_2)

    dp_3 =  Dropout(0.25)(deconv_2)
    upsamp_3 = UpSampling2D((2,2))(dp_3)
    resize_3 = Resize2D(K.shape(conv_1))(upsamp_3)
    deconv_3 = Convolution2D(4,3,3, border_mode = 'same', activation=activ,init='orthogonal',name='deconv_3',
                                   W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(resize_3)


    last_conv = Convolution2D(1,3,3, border_mode = 'same', activation=last_activ,init='orthogonal', name= 'output_mask',
                                       W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(deconv_3)

    model = Model(input=[main_input], output=[last_conv])
    #opt = SGD(lr=lr, decay= 1e-6, momentum=0.9,nesterov=True)
    #opt = Adadelta(lr=lr, rho=0.95, epsilon=1e-06,clipvalue=10)
    opt = adam(lr=lr)
    model.compile(loss={'output_mask': loss }, optimizer=opt)
    return model
Beispiel #14
0
def Skew(inputs):
    inputs_ = K.permute_dimensions(inputs, (3,0,1,2))
    buffer_ = T.zeros((K.shape(inputs)[3], K.shape(inputs)[0], K.shape(inputs)[1]+K.shape(inputs)[3]-1, K.shape(inputs)[2]))

    def fnc(buf, inp, i):
        return T.set_subtensor(buf[:, i:i+K.shape(inputs)[1], :], inp[:,:,:])

    res, _ = theano.scan(fn=fnc, sequences=[buffer_, inputs_, T.arange(K.shape(inputs)[3])])
    res = K.permute_dimensions(res, (1,2,3,0))

    return res
def make_patches(x, patch_size, patch_stride):
    from theano.tensor.nnet.neighbours import images2neibs
    x = K.expand_dims(x, 0)
    patches = images2neibs(x,
        (patch_size, patch_size), (patch_stride, patch_stride),
        mode='valid')
    # neibs are sorted per-channel
    patches = K.reshape(patches, (K.shape(x)[1], K.shape(patches)[0] // K.shape(x)[1], patch_size, patch_size))
    patches = K.permute_dimensions(patches, (1, 0, 2, 3))
    patches_norm = K.l2_normalize(patches, 1)
    return patches, patches_norm
Beispiel #16
0
 def _transform(self, X, affine_transformation, output_size):
     batch_size, num_channels = K.shape(X)[0], K.shape(X)[3]
     transformations = K.reshape(affine_transformation,
                                 shape=(batch_size, 2, 3))
     # transformations = K.cast(affine_transformation[:, 0:2, :], 'float32')
     regular_grids = self._make_regular_grids(batch_size, *output_size)
     sampled_grids = K.batch_dot(transformations, regular_grids)
     interpolated_image = self._interpolate(X, sampled_grids, output_size)
     new_shape = (batch_size, output_size[0], output_size[1], num_channels)
     interpolated_image = K.reshape(interpolated_image, new_shape)
     return interpolated_image
def make_patches(x, shape):
    x = K.expand_dims(x, 0)

    patches = images2neibs(x, (shape, shape))
    patches = K.reshape(patches, (K.shape(x)[1],
                                  K.shape(patches)[0] / K.shape(x)[1],
                                  shape, shape))
    
    patches_norm = K.sqrt(K.sum(K.square(patches), axis=(1,2,3),
                                keepdims=True))

    return patches, patches_norm
def make_patches(x, patch_size, patch_stride):
    '''Break image `x` up into a bunch of patches.'''
    from theano.tensor.nnet.neighbours import images2neibs
    x = K.expand_dims(x, 0)
    patches = images2neibs(x,
        (patch_size, patch_size), (patch_stride, patch_stride),
        mode='valid')
    # neibs are sorted per-channel
    patches = K.reshape(patches, (K.shape(x)[1], K.shape(patches)[0] // K.shape(x)[1], patch_size, patch_size))
    patches = K.permute_dimensions(patches, (1, 0, 2, 3))
    patches_norm = K.sqrt(K.sum(K.square(patches), axis=(1,2,3), keepdims=True))
    return patches, patches_norm
 def call(self, x, mask=None):
     if (self.size == None) or (self.mode == 'sum'):
         self.size = int(x.shape[-1])
     batch_size, seq_len = K.shape(x)[0], K.shape(x)[1]
     position_j = 1. / K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size)
     position_j = K.expand_dims(position_j, 0)
     position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1  # K.arange不支持变长,只好用这种方法生成
     position_i = K.expand_dims(position_i, 2)
     position_ij = K.dot(position_i, position_j)
     position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2)
     if self.mode == 'sum':
         return position_ij + x
     elif self.mode == 'concat':
         return K.concatenate([position_ij, x], 2)
    def call(self, X, mask=None):
        # 1D -> 2D
        batch = K.shape(X)[0]
        width = deconv_output_length(K.shape(X)[1],
                                    self.filter_length,
                                    self.padding,
                                    self.strides[2])

        print("Output width: ", width)

        print("Input shape: ", K.shape(X))
        X = K.expand_dims(X,2)
        print("Input shape after expand: ", K.shape(X))
        # X = K.permute_dimensions(X, (0, 2, 3, 1))
        X = K.permute_dimensions(X, (0, 2, 1, 3))
        print("Input shape after permute: ", K.shape(X))
        deconv_shape = tf.pack([batch, 1, width, self.nb_filter])
        print("Deconv shape: ", deconv_shape)
        conv_out = tf.nn.conv2d_transpose(X, self.W, strides=self.strides,
                                          padding=self.padding.upper(),
                                          output_shape=deconv_shape)

        output = conv_out + K.reshape(self.b, (1, 1, 1, self.W_shape[2]))
        print("Output shape: ", K.shape(output))
        # output =  K.permute_dimensions(output, (0, 3, 1, 2))
        output =  K.permute_dimensions(output, (0, 2, 1, 3))
        print("Output shape after permute: ", K.shape(output))
        # 2D -> 1D
        output = K.squeeze(output,2)
        print("Output shape after squeeze: ", K.shape(output))
        return output
Beispiel #21
0
 def loss(y_true, y_pred):
     from plasma.conf import conf
     fac = MaxHingeTarget.fac
     #overall_fac = np.prod(np.array(K.shape(y_pred)[1:]).astype(np.float32))
     overall_fac = K.prod(K.cast(K.shape(y_pred)[1:],K.floatx()))
     max_val = K.max(y_pred,axis=-2) #temporal axis!
     max_val1 = K.repeat(max_val,K.shape(y_pred)[-2])
     mask = K.cast(K.equal(max_val1,y_pred),K.floatx())
     y_pred1 = mask * y_pred + (1-mask) * y_true
     weight_mask = K.mean(y_true,axis=-1)
     weight_mask = K.cast(K.greater(weight_mask,0.0),K.floatx()) #positive label!
     weight_mask = fac*weight_mask + (1 - weight_mask)
     #return weight_mask*squared_hinge(y_true,y_pred1)
     return conf['model']['loss_scale_factor']*overall_fac*weight_mask*hinge(y_true,y_pred1)
Beispiel #22
0
def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon
Beispiel #23
0
    def call(self, inputs, **kwargs):
        assert isinstance(inputs, list) and len(inputs) == 3
        first, second, features = inputs[0], inputs[1], inputs[2]
        if not self.from_logits:
            first = kb.clip(first, 1e-10, 1.0)
            second = kb.clip(second, 1e-10, 1.0)
            first_, second_ = kb.log(first), kb.log(second)
        else:
            first_, second_ = first, second
        # embedded_features.shape = (M, T, 1)
        if self.use_intermediate_layer:
            features = kb.dot(features, self.first_kernel)
            features = kb.bias_add(features, self.first_bias, data_format="channels_last")
            features = self.intermediate_activation(features)
        embedded_features = kb.dot(features, self.features_kernel)
        embedded_features = kb.bias_add(
            embedded_features, self.features_bias, data_format="channels_last")
        if self.use_dimension_bias:
            tiling_shape = [1] * (kb.ndim(first)-1) + [kb.shape(first)[-1]]
            embedded_features = kb.tile(embedded_features, tiling_shape)
            embedded_features = kb.bias_add(
                embedded_features, self.dimensions_bias, data_format="channels_last")
        sigma = kb.sigmoid(embedded_features)

        result = weighted_sum(first_, second_, sigma,
                              self.first_threshold, self.second_threshold)
        probs = kb.softmax(result)
        if self.return_logits:
            return [probs, result]
        return probs
    def call(self, x, mask=None):

        input_shape = K.shape(x)

        if self.dim_ordering == 'th':
            num_rows = input_shape[2]
            num_cols = input_shape[3]
        elif self.dim_ordering == 'tf':
            num_rows = input_shape[1]
            num_cols = input_shape[2]

        row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
        col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]

        outputs = []

        if self.dim_ordering == 'th':
            for pool_num, num_pool_regions in enumerate(self.pool_list):
                for ix in range(num_pool_regions):
                    for jy in range(num_pool_regions):
                        x1 = ix * col_length[pool_num]
                        x2 = ix * col_length[pool_num] + col_length[pool_num]
                        y1 = jy * row_length[pool_num]
                        y2 = jy * row_length[pool_num] + row_length[pool_num]

                        x1 = K.cast(K.round(x1), 'int32')
                        x2 = K.cast(K.round(x2), 'int32')
                        y1 = K.cast(K.round(y1), 'int32')
                        y2 = K.cast(K.round(y2), 'int32')

                        new_shape = [input_shape[0], input_shape[1],
                                     y2 - y1, x2 - x1]
                        x_crop = x[:, :, y1:y2, x1:x2]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(2, 3))
                        outputs.append(pooled_val)

        elif self.dim_ordering == 'tf':
            for pool_num, num_pool_regions in enumerate(self.pool_list):
                for ix in range(num_pool_regions):
                    for jy in range(num_pool_regions):
                        x1 = ix * col_length[pool_num]
                        x2 = ix * col_length[pool_num] + col_length[pool_num]
                        y1 = jy * row_length[pool_num]
                        y2 = jy * row_length[pool_num] + row_length[pool_num]

                        x1 = K.cast(K.round(x1), 'int32')
                        x2 = K.cast(K.round(x2), 'int32')
                        y1 = K.cast(K.round(y1), 'int32')
                        y2 = K.cast(K.round(y2), 'int32')

                        new_shape = [input_shape[0], y2 - y1,
                                     x2 - x1, input_shape[3]]
                        x_crop = x[:, y1:y2, x1:x2, :]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(1, 2))
                        outputs.append(pooled_val)

        outputs = K.concatenate(outputs)
        return outputs
def _loss_tensor(y_true, y_pred):
    max_val = K.max(y_pred,axis=-2) #temporal axis!
    max_val = K.repeat(max_val,K.shape(y_pred)[-2])
    print(K.eval(max_val))
    mask = K.cast(K.equal(max_val,y_pred),K.floatx())
    y_pred = mask * y_pred + (1-mask) * y_true
    return squared_hinge(y_true,y_pred)
 def call(self, X, mask=None):
    
     input_shape = self.input_spec[0].shape
     
     x = K.reshape(X[0], (-1, input_shape[2]))
     target = X[1].flatten() if self.trainable else None
     
     Y = h_softmax(x, K.shape(x)[0], self.output_dim, 
                           self.n_classes, self.n_outputs_per_class,
                           self.W1, self.b1, self.W2, self.b2, target)
     
     output_dim = 1 if self.trainable else self.output_dim    
     input_length = K.shape(X[0])[1]
    
     y = K.reshape(Y, (-1, input_length, output_dim))
     return y
Beispiel #27
0
def yolo_head(feats, anchors, num_classes, input_shape):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))

    return box_xy, box_wh, box_confidence, box_class_probs
Beispiel #28
0
    def call(self, x, mask=None):
        if hasattr(x, '_keras_shape'):
            input_shape = x._keras_shape
        else:
            input_shape = self._input_shape
        #import pdb
        #pdb.set_trace()
        #if self.last_two is not None:
        #    last2 = self.last_two
        #else:
        #    input_shape = x._keras_shape
        #    last2 = input_shape[-2:]
        #out_shape = K.shape(x)[:-2]

        x = K.reshape(x, (-1,) + input_shape[-2:]) # (batch * d1 * ... * dn-2, dn-1, dn)
        if mask is not None:
            mask_shape = (K.shape(x)[0], -1)
            mask = K.reshape(mask, mask_shape) # give it the same first dim
        y = self.layer.call(x, mask)
        #try:
        #output_shape = self.get_output_shape_for(K.shape(x))
        #except:
        output_shape =  self.get_output_shape_for(input_shape)
        #import pdb
        #pdb.set_trace()
        return K.cast(K.reshape(y, output_shape), K.floatx()) 
Beispiel #29
0
def image_categorical_crossentropy(output, target, from_logits=False):
    output = T.clip(output, _EPSILON, 1.0 - _EPSILON)
    output_ = K.reshape(output, (-1, 256))
    target_ = K.reshape(target, (-1, 256))
    out = T.nnet.categorical_crossentropy(output_, target_)
    out = K.reshape(out,(K.shape(output)[0],-1))
    return T.mean(T.mean(out, axis=1))
Beispiel #30
0
def batch_gather(reference, indices):
    """
    C+P From Keras pull request https://github.com/keras-team/keras/pull/6377/files
    
    Batchwise gathering of row indices.

    The numpy equivalent is `reference[np.arange(batch_size), indices]`, where
    `batch_size` is the first dimension of the reference tensor.

    # Arguments
        reference: A tensor with ndim >= 2 of shape.
          (batch_size, dim1, dim2, ..., dimN)
        indices: A 1d integer tensor of shape (batch_size) satisfying
          0 <= i < dim2 for each element i.

    # Returns
        The selected tensor with shape (batch_size, dim2, ..., dimN).

    # Examples
        1. If reference is `[[3, 5, 7], [11, 13, 17]]` and indices is `[2, 1]`
        then the result is `[7, 13]`.

        2. If reference is
        ```
          [[[2, 3], [4, 5], [6, 7]],
           [[10, 11], [12, 13], [16, 17]]]
        ```
        and indices is `[2, 1]` then the result is `[[6, 7], [12, 13]]`.
    """
    batch_size = K.shape(reference)[0]
    indices = tf.stack([tf.range(batch_size), indices], axis=1)
    return tf.gather_nd(reference, indices)
Beispiel #31
0
def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0.,
                              stddev=epsilon_std)
    return z_mean + K.exp(z_log_var / 2) * epsilon
    def call(self, x, mask=None):
        # We assume the tuples are SVO and each slot is represented as vector.
        # Moreover, we assume each answer option is encoded as a single vector.
        # knowledge_embedding: (batch_size, num_tuples, tuple_size, embed_dim)
        # question_embedding: (batch_size, question_length, embed_dim)
        # answer_embedding: (batch_size, num_options, embed_dim)
        knowledge_embedding, question_embedding, answer_embedding = x
        if mask is None:
            knowledge_mask = question_mask = answer_mask = None
        else:
            knowledge_mask, question_mask, answer_mask = mask
        if knowledge_mask is None:
            sv_knowledge_mask = vo_knowledge_mask = subj_knowledge_mask = obj_knowledge_mask = None
        else:
            # Take out the relevant parts for each part of the tuple and reshape SV and VO masks using
            # batch_flatten.
            # (batch_size, num_tuples*2)
            sv_knowledge_mask = K.batch_flatten(knowledge_mask[:, :, :2])
            # (batch_size, num_tuples*2)
            vo_knowledge_mask = K.batch_flatten(knowledge_mask[:, :, 1:])
            # (batch_size, num_tuples)
            subj_knowledge_mask = knowledge_mask[:, :, 0]
            # (batch_size, num_tuples)
            obj_knowledge_mask = knowledge_mask[:, :, 2]
        batch_size = K.shape(knowledge_embedding)[0]
        sv_knowledge = K.reshape(
            knowledge_embedding[:, :, :2, :],
            (batch_size, self.num_tuples * 2, self.input_dim))
        vo_knowledge = K.reshape(
            knowledge_embedding[:, :, 1:, :],
            (batch_size, self.num_tuples * 2, self.input_dim))
        # (batch_size, num_tuples, embed_dim)
        subj_knowledge = knowledge_embedding[:, :, 0, :]
        # (batch_size, num_tuples, embed_dim)
        obj_knowledge = knowledge_embedding[:, :, 2, :]

        ## Step A1: Align SV with question.
        # Source is question, target is SV knowledge
        # (batch_size, question_length, num_tuples*2)
        sv_question_knowledge_alignment = self._align(
            question_embedding,
            sv_knowledge,
            question_mask,
            sv_knowledge_mask,
            normalize_alignment=False)
        # Sum probabilities over S and V slots. This is still a valid probability distribution.
        # (batch_size, question_length, num_tuples)
        sv_question_tuple_weights = K.sum(K.reshape(
            sv_question_knowledge_alignment,
            (batch_size, self.question_length, self.num_tuples, 2)),
                                          axis=-1)
        # Average over question length. This is essentially the weights of tuples based on how well their
        # S and V slots align to any word in the question.
        # Insight: This is essentially \sum_{i} p_align(tuple | q_word_i) * p_imp(q_word_i), where q_word_i is
        # the ith word in the question, p_align is the alignment weight and p_imp is the importance of the
        # question word, and p_imp is uniform.
        # (batch_size, num_tuples)
        sv_tuple_weights = K.mean(sv_question_tuple_weights, axis=1)

        ## Step A2: Align answer with Obj.
        # Source is obj knowledge, target is answer
        # (batch_size, num_tuples, num_options)
        obj_knowledge_answer_alignment = self._align(obj_knowledge,
                                                     answer_embedding,
                                                     obj_knowledge_mask,
                                                     answer_mask,
                                                     normalize_alignment=False)
        # (batch_size, num_tuples, num_options)
        tiled_sv_tuple_weights = K.dot(K.expand_dims(sv_tuple_weights),
                                       K.ones((1, self.num_options)))
        # Now we compute a weighted average over the tuples dimension, with the weights coming from how well
        # the tuples align with the question.
        # (batch_size, num_options)
        obj_answer_weights = K.sum(tiled_sv_tuple_weights *
                                   obj_knowledge_answer_alignment,
                                   axis=1)

        # Following steps are similar to what we did so far. Just substitute VO for SV and S for O.
        ## Step B1: Align VO with question
        vo_question_knowledge_alignment = self._align(
            question_embedding,
            vo_knowledge,
            question_mask,
            vo_knowledge_mask,
            normalize_alignment=False)
        vo_question_tuple_weights = K.sum(K.reshape(
            vo_question_knowledge_alignment,
            (batch_size, self.question_length, self.num_tuples, 2)),
                                          axis=-1)
        vo_tuple_weights = K.mean(vo_question_tuple_weights, axis=1)

        ## Step B2: Align answer with Subj
        subj_knowledge_answer_alignment = self._align(
            subj_knowledge,
            answer_embedding,
            subj_knowledge_mask,
            answer_mask,
            normalize_alignment=False)
        tiled_vo_tuple_weights = K.dot(K.expand_dims(vo_tuple_weights),
                                       K.ones((1, self.num_options)))
        subj_answer_weights = K.sum(tiled_vo_tuple_weights *
                                    subj_knowledge_answer_alignment,
                                    axis=1)

        # We now select the element wise max of obj_answer_weights and subj_answer_weights as our final weights.
        # (batch_size, num_options)
        max_answer_weights = switch(
            K.greater(obj_answer_weights, subj_answer_weights),
            obj_answer_weights, subj_answer_weights)
        # Renormalizing max weights.
        return K.softmax(max_answer_weights)
Beispiel #33
0
    def compute_mask(self, inputs, mask=None):
        """Computes an output mask tensor for Embedding layer.

    This is based on the inputs, mask, and the inner layer.
    If batch size is specified:
    Simply return the input `mask`. (An rnn-based implementation with
    more than one rnn inputs is required but not supported in tf.keras yet.)
    Otherwise we call `compute_mask` of the inner layer at each time step.
    If the output mask at each time step is not `None`:
    (E.g., inner layer is Masking or RNN)
    Concatenate all of them and return the concatenation.
    If the output mask at each time step is `None` and the input mask is not
    `None`:(E.g., inner layer is Dense)
    Reduce the input_mask to 2 dimensions and return it.
    Otherwise (both the output mask and the input mask are `None`):
    (E.g., `mask` is not used at all)
    Return `None`.

    Args:
      inputs: Tensor with shape [batch size, timesteps, ...] indicating the
        input to TimeDistributed. If static shape information is available for
        "batch size", `mask` is returned unmodified.
      mask: Either None (indicating no masking) or a Tensor indicating the
        input mask for TimeDistributed. The shape can be static or dynamic.

    Returns:
      Either None (no masking), or a [batch size, timesteps, ...] Tensor with
      an output mask for the TimeDistributed layer with the shape beyond the
      second dimension being the value of the input mask shape(if the computed
      output mask is none), an output mask with the shape beyond the first
      dimension being the value of the mask shape(if mask is not None) or
      output mask with the shape beyond the first dimension being the
      value of the computed output shape.

    """
        # cases need to call the layer.compute_mask when input_mask is None:
        # Masking layer and Embedding layer with mask_zero
        input_shape = tf.nest.map_structure(
            lambda x: tf.TensorShape(K.int_shape(x)), inputs)
        input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False)
        batch_size = tf_utils.convert_shapes(input_shape)
        batch_size = tf.nest.flatten(batch_size)[0]
        is_ragged_input = tf.nest.map_structure(
            lambda x: isinstance(x, tf.RaggedTensor), inputs)
        is_ragged_input = generic_utils.to_list(
            tf.nest.flatten(is_ragged_input))
        if batch_size and not self._always_use_reshape or any(is_ragged_input):
            # batch size matters, we currently do not handle mask explicitly, or if
            # the layer always uses reshape approach, or the input is a ragged tensor.
            return mask
        inner_mask = mask
        if inner_mask is not None:
            inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2)
            inner_mask = K.reshape(inner_mask, inner_mask_shape)
        inner_input_shape = tf.nest.map_structure(
            lambda tensor: self._get_shape_tuple((-1, ), tensor, 2), inputs)
        inner_inputs = tf.__internal__.nest.map_structure_up_to(
            inputs, tf.reshape, inputs, inner_input_shape)
        output_mask = self.layer.compute_mask(inner_inputs, inner_mask)
        if output_mask is None:
            if mask is None:
                return None
            # input_mask is not None, and output_mask is None:
            # we should return a not-None mask
            output_mask = mask
            for _ in range(2, len(K.int_shape(mask))):
                output_mask = K.any(output_mask, axis=-1)
        else:
            # output_mask is not None. We need to reshape it
            input_length = tf_utils.convert_shapes(input_shape)
            input_length = tf.nest.flatten(input_length)[1]
            if not input_length:
                input_length = tf.nest.map_structure(lambda x: K.shape(x)[1],
                                                     inputs)
                input_length = tf.nest.flatten(input_length)[0]
            output_mask_int_shape = K.int_shape(output_mask)
            if output_mask_int_shape is None:
                # if the output_mask does not have a static shape,
                # its shape must be the same as mask's
                if mask is not None:
                    output_mask_int_shape = K.int_shape(mask)
                else:
                    input_shape = generic_utils.to_list(
                        tf.nest.flatten(input_shape))[0]
                    output_mask_int_shape = K.compute_output_shape(
                        input_shape)[:-1]
            output_mask_shape = self._get_shape_tuple(
                (-1, input_length), output_mask, 1, output_mask_int_shape[1:])
            output_mask = K.reshape(output_mask, output_mask_shape)
        return output_mask
Beispiel #34
0
def repeat_output(input):
    shape = K.shape(x)
    return K.reshape(K.repeat(input, 4 * 4), (shape[0], 4, 4, 256))
def complex_standardization(input_centred,
                            Vrr,
                            Vii,
                            Vri,
                            layernorm=False,
                            axis=-1):

    ndim = K.ndim(input_centred)
    input_dim = K.shape(input_centred)[axis] // 2
    variances_broadcast = [1] * ndim
    variances_broadcast[axis] = input_dim
    if layernorm:
        variances_broadcast[0] = K.shape(input_centred)[0]

    # We require the covariance matrix's inverse square root. That first requires
    # square rooting, followed by inversion (I do this in that order because during
    # the computation of square root we compute the determinant we'll need for
    # inversion as well).

    # tau = Vrr + Vii = Trace. Guaranteed >= 0 because SPD
    tau = Vrr + Vii
    # delta = (Vrr * Vii) - (Vri ** 2) = Determinant. Guaranteed >= 0 because SPD
    delta = (Vrr * Vii) - (Vri**2)

    # s = np.sqrt(delta) # Determinant of square root matrix
    # t = np.sqrt(tau + 2 * s)
    s = K.sqrt(delta)  # Determinant of square root matrix
    t = K.sqrt(tau + 2 * s)

    # The square root matrix could now be explicitly formed as
    #       [ Vrr+s Vri   ]
    # (1/t) [ Vir   Vii+s ]
    # https://en.wikipedia.org/wiki/Square_root_of_a_2_by_2_matrix
    # but we don't need to do this immediately since we can also simultaneously
    # invert. We can do this because we've already computed the determinant of
    # the square root matrix, and can thus invert it using the analytical
    # solution for 2x2 matrices
    #      [ A B ]             [  D  -B ]
    # inv( [ C D ] ) = (1/det) [ -C   A ]
    # http://mathworld.wolfram.com/MatrixInverse.html
    # Thus giving us
    #           [  Vii+s  -Vri   ]
    # (1/s)(1/t)[ -Vir     Vrr+s ]
    # So we proceed as follows:

    inverse_st = 1.0 / (s * t)
    Wrr = (Vii + s) * inverse_st
    Wii = (Vrr + s) * inverse_st
    Wri = -Vri * inverse_st

    # And we have computed the inverse square root matrix W = sqrt(V)!
    # Normalization. We multiply, x_normalized = W.x.

    # The returned result will be a complex standardized input
    # where the real and imaginary parts are obtained as follows:
    # x_real_normed = Wrr * x_real_centred + Wri * x_imag_centred
    # x_imag_normed = Wri * x_real_centred + Wii * x_imag_centred

    broadcast_Wrr = K.reshape(Wrr, variances_broadcast)
    broadcast_Wri = K.reshape(Wri, variances_broadcast)
    broadcast_Wii = K.reshape(Wii, variances_broadcast)

    cat_W_4_real = K.concatenate([broadcast_Wrr, broadcast_Wii], axis=axis)
    cat_W_4_imag = K.concatenate([broadcast_Wri, broadcast_Wri], axis=axis)

    if (axis == 1 and ndim != 3) or ndim == 2:
        centred_real = input_centred[:, :input_dim]
        centred_imag = input_centred[:, input_dim:]
    elif ndim == 3:
        centred_real = input_centred[:, :, :input_dim]
        centred_imag = input_centred[:, :, input_dim:]
    elif axis == -1 and ndim == 4:
        centred_real = input_centred[:, :, :, :input_dim]
        centred_imag = input_centred[:, :, :, input_dim:]
    elif axis == -1 and ndim == 5:
        centred_real = input_centred[:, :, :, :, :input_dim]
        centred_imag = input_centred[:, :, :, :, input_dim:]
    else:
        raise ValueError(
            'Incorrect Batchnorm combination of axis and dimensions. axis should be either 1 or -1. '
            'axis: ' + str(self.axis) + '; ndim: ' + str(ndim) + '.')
    rolled_input = K.concatenate([centred_imag, centred_real], axis=axis)

    output = cat_W_4_real * input_centred + cat_W_4_imag * rolled_input

    #   Wrr * x_real_centered | Wii * x_imag_centered
    # + Wri * x_imag_centered | Wri * x_real_centered
    # -----------------------------------------------
    # = output

    return output
Beispiel #36
0
    def _scharr_edges(cls, image, magnitude):
        """ Returns a tensor holding modified Scharr edge maps.

        Parameters
        ----------
        image: tensor
            Image tensor with shape [batch_size, h, w, d] and type float32. The image(s) must be
            2x2 or larger.
        magnitude: bool
            Boolean to determine if the edge magnitude or edge direction is returned

        Returns
        -------
        tensor
            Tensor holding edge maps for each channel. Returns a tensor with shape `[batch_size, h,
            w, d, 2]` where the last two dimensions hold `[[dy[0], dx[0]], [dy[1], dx[1]], ...,
            [dy[d-1], dx[d-1]]]` calculated using the Scharr filter.
        """

        # Define vertical and horizontal Scharr filters.
        # TODO PlaidML: AttributeError: 'Value' object has no attribute 'get_shape'
        static_image_shape = image.get_shape()
        image_shape = K.shape(image)

        # 5x5 modified Scharr kernel ( reshape to (5,5,1,2) )
        matrix = np.array([[[[0.00070, 0.00070]],
                            [[0.00520, 0.00370]],
                            [[0.03700, 0.00000]],
                            [[0.00520, -0.0037]],
                            [[0.00070, -0.0007]]],
                           [[[0.00370, 0.00520]],
                            [[0.11870, 0.11870]],
                            [[0.25890, 0.00000]],
                            [[0.11870, -0.1187]],
                            [[0.00370, -0.0052]]],
                           [[[0.00000, 0.03700]],
                            [[0.00000, 0.25890]],
                            [[0.00000, 0.00000]],
                            [[0.00000, -0.2589]],
                            [[0.00000, -0.0370]]],
                           [[[-0.0037, 0.00520]],
                            [[-0.1187, 0.11870]],
                            [[-0.2589, 0.00000]],
                            [[-0.1187, -0.1187]],
                            [[-0.0037, -0.0052]]],
                           [[[-0.0007, 0.00070]],
                            [[-0.0052, 0.00370]],
                            [[-0.0370, 0.00000]],
                            [[-0.0052, -0.0037]],
                            [[-0.0007, -0.0007]]]])
        num_kernels = [2]
        kernels = K.constant(matrix, dtype='float32')
        kernels = K.tile(kernels, [1, 1, image_shape[-1], 1])

        # Use depth-wise convolution to calculate edge maps per channel.
        # Output tensor has shape [batch_size, h, w, d * num_kernels].
        pad_sizes = [[0, 0], [2, 2], [2, 2], [0, 0]]
        padded = pad(image, pad_sizes, mode='REFLECT')
        output = K.depthwise_conv2d(padded, kernels)

        if not magnitude:  # direction of edges
            # Reshape to [batch_size, h, w, d, num_kernels].
            shape = K.concatenate([image_shape, num_kernels], axis=0)
            output = K.reshape(output, shape=shape)
            output.set_shape(static_image_shape.concatenate(num_kernels))
            output = tf.atan(K.squeeze(output[:, :, :, :, 0] / output[:, :, :, :, 1], axis=None))
        # magnitude of edges -- unified x & y edges don't work well with Neural Networks
        return output
Beispiel #37
0
    def _interpolate(self, image, sampled_grids, output_size):

        batch_size = K.shape(image)[0]
        height = K.shape(image)[1]
        width = K.shape(image)[2]
        num_channels = K.shape(image)[3]

        x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32')
        y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32')

        x = .5 * (x + 1.0) * K.cast(width, dtype='float32')
        y = .5 * (y + 1.0) * K.cast(height, dtype='float32')

        x0 = K.cast(x, 'int32')
        x1 = x0 + 1
        y0 = K.cast(y, 'int32')
        y1 = y0 + 1

        max_x = int(K.int_shape(image)[2] - 1)
        max_y = int(K.int_shape(image)[1] - 1)

        x0 = K.clip(x0, 0, max_x)
        x1 = K.clip(x1, 0, max_x)
        y0 = K.clip(y0, 0, max_y)
        y1 = K.clip(y1, 0, max_y)

        pixels_batch = K.arange(0, batch_size) * (height * width)
        pixels_batch = K.expand_dims(pixels_batch, axis=-1)
        flat_output_size = output_size[0] * output_size[1]
        base = K.repeat_elements(pixels_batch, flat_output_size, axis=1)
        base = K.flatten(base)

        # base_y0 = base + (y0 * width)
        base_y0 = y0 * width
        base_y0 = base + base_y0
        # base_y1 = base + (y1 * width)
        base_y1 = y1 * width
        base_y1 = base_y1 + base

        indices_a = base_y0 + x0
        indices_b = base_y1 + x0
        indices_c = base_y0 + x1
        indices_d = base_y1 + x1

        flat_image = K.reshape(image, shape=(-1, num_channels))
        flat_image = K.cast(flat_image, dtype='float32')
        pixel_values_a = K.gather(flat_image, indices_a)
        pixel_values_b = K.gather(flat_image, indices_b)
        pixel_values_c = K.gather(flat_image, indices_c)
        pixel_values_d = K.gather(flat_image, indices_d)

        x0 = K.cast(x0, 'float32')
        x1 = K.cast(x1, 'float32')
        y0 = K.cast(y0, 'float32')
        y1 = K.cast(y1, 'float32')

        area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1)
        area_b = K.expand_dims(((x1 - x) * (y - y0)), 1)
        area_c = K.expand_dims(((x - x0) * (y1 - y)), 1)
        area_d = K.expand_dims(((x - x0) * (y - y0)), 1)

        values_a = area_a * pixel_values_a
        values_b = area_b * pixel_values_b
        values_c = area_c * pixel_values_c
        values_d = area_d * pixel_values_d
        return values_a + values_b + values_c + values_d
Beispiel #38
0
split = SplitVector()
couple = AddCouple()
concat = ConcatVector()
scale = Scale()

basic_model_1 = build_basic_model(original_dim // 2)
basic_model_2 = build_basic_model(original_dim // 2)
basic_model_3 = build_basic_model(original_dim // 2)
basic_model_4 = build_basic_model(original_dim // 2)

x_in = Input(shape=(original_dim, ))
x = x_in

# 给输入加入负噪声
x = Lambda(
    lambda s: K.in_train_phase(s - 0.01 * K.random_uniform(K.shape(s)), s))(x)

x = shuffle1(x)
x1, x2 = split(x)
mx1 = basic_model_1(x1)
x1, x2 = couple([x1, x2, mx1])
x = concat([x1, x2])

x = shuffle2(x)
x1, x2 = split(x)
mx1 = basic_model_2(x1)
x1, x2 = couple([x1, x2, mx1])
x = concat([x1, x2])

x = shuffle3(x)
x1, x2 = split(x)
Beispiel #39
0
def yolo_loss(args, anchors, n_classes, ignore_thresh=0.5):
    n_layers = len(args) // 2
    y_preds = args[:n_layers]  # [B,H,W,3*(4+1+c)], level0->level2
    y_trues = args[n_layers:]  # [B,H,W,3,4+1+c],   level0->level2

    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]  # big object---> P0
    input_shape = [y_trues[0]._keras_shape[i] * 32 for i in [1, 2]]
    grid_shapes = [y_trues[i]._keras_shape[1:3] for i in range(3)]

    loss = 0.
    xy_loss_, wh_loss_, ciou_loss_, conf_loss_, cls_loss_, = 0., 0., 0., 0., 0.
    m = K.shape(y_preds[0])[0]  # batch size
    mf = K.cast(m, K.dtype(y_preds[0]))  # batch size to float
    for i in range(n_layers):
        anchors_l = anchors[anchor_mask[i]]

        conf_gt = y_trues[i][..., 4:5]
        cls_gt = y_trues[i][..., 5:]

        feats = K.reshape(y_preds[i],
                          (-1, grid_shapes[i][0], grid_shapes[i][1],
                           len(anchors_l), 4 + 1 + n_classes))
        grid_coords, pred_xy, pred_wh, pred_conf, pred_cls = bbox(
            feats, anchors_l, n_classes, input_shape)  # normed
        pred_box = K.concatenate([pred_xy, pred_wh])  # normed

        conf_gt = y_trues[i][..., 4:5]
        cls_gt = y_trues[i][..., 5:]
        xy_gt = y_trues[i][
            ..., :2] * grid_shapes[i][::-1] - grid_coords  # offset to grid
        wh_gt = K.log(y_trues[i][..., 2:4] * input_shape[::-1] /
                      anchors_l)  # offset to grid
        # wh being too small would cause log(0)=-inf, in this case replace the infs with 0
        wh_gt = K.switch(conf_gt, wh_gt, K.zeros_like(wh_gt))
        box_loss_scale = 2 - y_trues[i][..., 2:3] * y_trues[i][..., 3:4]

        # box_loss: xy_loss+wh_loss / iou loss
        # xy_loss: bce, based on grid center
        xy_loss = conf_gt * box_loss_scale * K.binary_crossentropy(
            xy_gt, feats[..., 0:2], from_logits=True)
        # wh_loss: l2, based on anchor shape
        wh_loss = conf_gt * box_loss_scale * 0.5 * K.square(wh_gt -
                                                            feats[..., 2:4])
        # ciou_loss: iou
        ciou = tf.expand_dims(bbox_ciou(y_trues[i][..., :4], pred_box,
                                        grid_coords, grid_shapes[i],
                                        input_shape, anchors_l),
                              axis=-1)
        ciou_loss = conf_gt * box_loss_scale * (1 - ciou)

        # conf_loss: bce
        # ignore mask: objects on gt mask which has iou<ignore_thresh with anchors
        ignore_mask = tf.TensorArray(K.dtype(y_trues[0]),
                                     size=1,
                                     dynamic_size=True)  # 动态size数组
        object_mask = tf.cast(conf_gt, tf.bool)

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_trues[i][b, ..., 0:4], object_mask[
                b, ...,
                0])  # flattened(h*w*a*mask) gt boxes for current sample [N,4]
            iou = box_iou(pred_box[b], true_box)  # [H,W,a,N]
            best_iou = K.max(iou, axis=-1, keepdims=True)  # [H,W,a,1]
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()  # # [b,H,W,a,1]
        conf_loss = conf_gt * K.binary_crossentropy(conf_gt, feats[...,4:5], from_logits=True)+ \
                    (1-conf_gt) * ignore_mask* K.binary_crossentropy(conf_gt, feats[...,4:5], from_logits=True)

        # cls_loss: bce
        cls_loss = conf_gt * K.binary_crossentropy(
            cls_gt, feats[..., 5:], from_logits=True)

        # xy_loss_ += K.sum(xy_loss) / mf
        # wh_loss_ += K.sum(wh_loss) / mf
        ciou_loss_ += K.sum(ciou_loss_) / mf
        conf_loss_ += K.sum(conf_loss) / mf
        cls_loss_ += K.sum(cls_loss) / mf

    # loss = xy_loss_ + wh_loss_ + conf_loss_ + cls_loss_
    loss = ciou_loss_ + conf_loss_ + cls_loss_

    # return loss
    return tf.stack([loss, xy_loss_, wh_loss_, conf_loss_, cls_loss_], axis=0)
Beispiel #40
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
                          (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
                                                                    from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
def mytransform(source, ref_tensor):
    target_shape = K.shape(ref_tensor)
    source_shape = K.shape(source)
    return K.resize_images(source, target_shape[1] / source_shape[1],
                           target_shape[2] / source_shape[2], "channels_last")
 def call(self, x, mask=None):
     return K.in_train_phase(
         K.relu(x, K.random_uniform(K.shape(x), self.l, self.u)),
         K.relu(x, self.average))
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(T, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    yolo_outputs = args[:3]
    y_true = args[3:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(3)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]

    for l in range(3):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(
            yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet box loss.
        xy_delta = (y_true[l][..., :2] - pred_xy) * grid_shapes[l][::-1]
        wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh)
        # Avoid log(0)=-inf.
        wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta))
        box_delta = K.concatenate([xy_delta, wh_delta], axis=-1)
        box_delta_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        box_loss = object_mask * K.square(box_delta * box_delta_scale)
        confidence_loss = object_mask * K.square(1-pred_confidence) + \
            (1-object_mask) * K.square(0-pred_confidence) * ignore_mask
        class_loss = object_mask * K.square(true_class_probs -
                                            pred_class_probs)
        loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss)
    return loss / K.cast(m, K.dtype(loss))
Beispiel #44
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]),
                               axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum +
                        coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss
Beispiel #45
0
    # 重参数技巧
    def call(self, inputs):
        z, shift, log_scale = inputs
        z = K.exp(log_scale) * z + shift
        logdet = -K.sum(K.mean(log_scale, 0))
        self.add_loss(logdet)
        return z


# 算p(Z|X)的均值和方差
z_shift = Dense(z_dim)(x)

z_log_scale = Dense(z_dim)(x)
# 重参数层,相当于给输入加入噪声
u = Lambda(lambda z: K.random_normal(shape=K.shape(z)))(z_shift)
z = ScaleShift()([u, z_shift, z_log_scale])

x_recon = decoder(z)
x_out = Subtract()([x_in, x_recon])
# xent_loss是重构loss,z_loss是KL loss
recon_loss = 0.5 * K.sum(K.mean(x_out**2, 0)) + 0.5 * np.log(
    2 * np.pi) * np.prod(K.int_shape(x_out)[1:])
z_loss = 0.5 * K.sum(K.mean(z**2, 0)) - 0.5 * K.sum(K.mean(u**2, 0))
vae_loss = recon_loss + z_loss

vae = Model(x_in, x_out)
vae.add_loss(vae_loss)
vae.compile(optimizer=Adam(1e-4))

Beispiel #46
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_xy, box_wh, box_confidence, box_class_probs
def ComplexBN(input_centred,
              Vrr,
              Vii,
              Vri,
              beta,
              gamma_rr,
              gamma_ri,
              gamma_ii,
              scale=True,
              center=True,
              layernorm=False,
              axis=-1):

    ndim = K.ndim(input_centred)
    input_dim = K.shape(input_centred)[axis] // 2
    if scale:
        gamma_broadcast_shape = [1] * ndim
        gamma_broadcast_shape[axis] = input_dim
    if center:
        broadcast_beta_shape = [1] * ndim
        broadcast_beta_shape[axis] = input_dim * 2

    if scale:
        standardized_output = complex_standardization(input_centred,
                                                      Vrr,
                                                      Vii,
                                                      Vri,
                                                      layernorm,
                                                      axis=axis)

        # Now we perform th scaling and Shifting of the normalized x using
        # the scaling parameter
        #           [  gamma_rr gamma_ri  ]
        #   Gamma = [  gamma_ri gamma_ii  ]
        # and the shifting parameter
        #    Beta = [beta_real beta_imag].T
        # where:
        # x_real_BN = gamma_rr * x_real_normed + gamma_ri * x_imag_normed + beta_real
        # x_imag_BN = gamma_ri * x_real_normed + gamma_ii * x_imag_normed + beta_imag

        broadcast_gamma_rr = K.reshape(gamma_rr, gamma_broadcast_shape)
        broadcast_gamma_ri = K.reshape(gamma_ri, gamma_broadcast_shape)
        broadcast_gamma_ii = K.reshape(gamma_ii, gamma_broadcast_shape)

        cat_gamma_4_real = K.concatenate(
            [broadcast_gamma_rr, broadcast_gamma_ii], axis=axis)
        cat_gamma_4_imag = K.concatenate(
            [broadcast_gamma_ri, broadcast_gamma_ri], axis=axis)
        if (axis == 1 and ndim != 3) or ndim == 2:
            centred_real = standardized_output[:, :input_dim]
            centred_imag = standardized_output[:, input_dim:]
        elif ndim == 3:
            centred_real = standardized_output[:, :, :input_dim]
            centred_imag = standardized_output[:, :, input_dim:]
        elif axis == -1 and ndim == 4:
            centred_real = standardized_output[:, :, :, :input_dim]
            centred_imag = standardized_output[:, :, :, input_dim:]
        elif axis == -1 and ndim == 5:
            centred_real = standardized_output[:, :, :, :, :input_dim]
            centred_imag = standardized_output[:, :, :, :, input_dim:]
        else:
            raise ValueError(
                'Incorrect Batchnorm combination of axis and dimensions. axis should be either 1 or -1. '
                'axis: ' + str(self.axis) + '; ndim: ' + str(ndim) + '.')
        rolled_standardized_output = K.concatenate(
            [centred_imag, centred_real], axis=axis)
        if center:
            broadcast_beta = K.reshape(beta, broadcast_beta_shape)
            return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output + broadcast_beta
        else:
            return cat_gamma_4_real * standardized_output + cat_gamma_4_imag * rolled_standardized_output
    else:
        if center:
            broadcast_beta = K.reshape(beta, broadcast_beta_shape)
            return input_centred + broadcast_beta
        else:
            return input_centred
Beispiel #48
0
print('Model loaded.')

# Get the symbolic outputs of each "key" layer (we gave them unique names).
layer_dict = dict([(layer.name, layer) for layer in model.layers])

# Define the loss.
loss = K.variable(0.)
for layer_name in settings['features']:
    # Add the L2 norm of the features of a layer to the loss.

    assert layer_name in layer_dict.keys(
    ), 'Layer ' + layer_name + ' not found in model.'
    coeff = settings['features'][layer_name]
    x = layer_dict[layer_name].output
    # We avoid border artifacts by only involving non-border pixels in the loss.
    scaling = K.prod(K.cast(K.shape(x), 'float32'))
    if K.image_data_format() == 'channels_first':
        loss += coeff * K.sum(K.square(x[:, :, 2:-2, 2:-2])) / scaling
    else:
        loss += coeff * K.sum(K.square(x[:, 2:-2, 2:-2, :])) / scaling

# Compute the gradients of the dream wrt the loss.
grads = K.gradients(loss, dream)[0]
# Normalize gradients.
grads /= K.maximum(K.mean(K.abs(grads)), 1e-7)

# Set up function to retrieve the value
# of the loss and gradients given an input image.
outputs = [loss, grads]
fetch_loss_and_grads = K.function([dream], outputs)
Beispiel #49
0
 def __init__(self, model, momentum=0.9999):
     self.momentum = momentum
     self.model = model
     self.ema_weights = [K.zeros(K.shape(w)) for w in model.weights]
Beispiel #50
0
    def call(self, inputs):
        z_mean, z_log_var = inputs
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], self.latent_dim))

        return z_mean + K.exp(z_log_var / 2) * epsilon
Beispiel #51
0
 def timedistributed_concat(packed):
     x, pic = packed
     return K.concatenate([x, K.repeat(pic,
                                       K.shape(x)[-2])],
                          axis=-1)
Beispiel #52
0
    def call(self, inputs, training=None, mask=None):
        input_shape = K.shape(inputs)

        if self.rank == 1:
            input_shape = [input_shape[i] for i in range(3)]
            batch_shape, dim, channels = input_shape

            xx_range = tf.tile(K.expand_dims(K.arange(0, dim), axis=0),
                              K.stack([batch_shape, 1]))
            xx_range = K.expand_dims(xx_range, axis=-1)

            xx_channels = K.cast(xx_range, K.floatx())
            xx_channels = xx_channels / K.cast(dim - 1, K.floatx())
            xx_channels = (xx_channels * 2) - 1.

            outputs = K.concatenate([inputs, xx_channels], axis=-1)

        if self.rank == 2:
            if self.data_format == 'channels_first':
                inputs = K.permute_dimensions(inputs, [0, 2, 3, 1])

            input_shape = [input_shape[i] for i in range(4)]
            batch_shape, dim1, dim2, channels = input_shape

            xx_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32')
            xx_ones = K.expand_dims(xx_ones, axis=-1)

            xx_range = tf.tile(K.expand_dims(K.arange(0, dim1), axis=0),
                              K.stack([batch_shape, 1]))
            xx_range = K.expand_dims(xx_range, axis=1)
            xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1])
            xx_channels = K.expand_dims(xx_channels, axis=-1)
            xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3])

            yy_ones = K.ones(K.stack([batch_shape, dim1]), dtype='int32')
            yy_ones = K.expand_dims(yy_ones, axis=1)

            yy_range = tf.tile(K.expand_dims(K.arange(0, dim2), axis=0),
                              K.stack([batch_shape, 1]))
            yy_range = K.expand_dims(yy_range, axis=-1)

            yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1])
            yy_channels = K.expand_dims(yy_channels, axis=-1)
            yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3])

            xx_channels = K.cast(xx_channels, K.floatx())
            xx_channels = xx_channels / K.cast(dim1 - 1, K.floatx())
            xx_channels = (xx_channels * 2) - 1.

            yy_channels = K.cast(yy_channels, K.floatx())
            yy_channels = yy_channels / K.cast(dim2 - 1, K.floatx())
            yy_channels = (yy_channels * 2) - 1.

            outputs = K.concatenate([inputs, xx_channels, yy_channels], axis=-1)

            if self.use_radius:
                rr = K.sqrt(K.square(xx_channels - 0.5) +
                            K.square(yy_channels - 0.5))
                outputs = K.concatenate([outputs, rr], axis=-1)

            if self.data_format == 'channels_first':
                outputs = K.permute_dimensions(outputs, [0, 3, 1, 2])

        if self.rank == 3:
            if self.data_format == 'channels_first':
                inputs = K.permute_dimensions(inputs, [0, 2, 3, 4, 1])

            input_shape = [input_shape[i] for i in range(5)]
            batch_shape, dim1, dim2, dim3, channels = input_shape

            xx_ones = K.ones(K.stack([batch_shape, dim3]), dtype='int32')
            xx_ones = K.expand_dims(xx_ones, axis=-1)

            xx_range = tf.tile(K.expand_dims(K.arange(0, dim2), axis=0),
                              K.stack([batch_shape, 1]))
            xx_range = K.expand_dims(xx_range, axis=1)

            xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1])
            xx_channels = K.expand_dims(xx_channels, axis=-1)
            xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3])

            xx_channels = K.expand_dims(xx_channels, axis=1)
            xx_channels = tf.tile(xx_channels,
                                 [1, dim1, 1, 1, 1])

            yy_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32')
            yy_ones = K.expand_dims(yy_ones, axis=1)

            yy_range = tf.tile(K.expand_dims(K.arange(0, dim3), axis=0),
                              K.stack([batch_shape, 1]))
            yy_range = K.expand_dims(yy_range, axis=-1)

            yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1])
            yy_channels = K.expand_dims(yy_channels, axis=-1)
            yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3])

            yy_channels = K.expand_dims(yy_channels, axis=1)
            yy_channels = tf.tile(yy_channels,
                                 [1, dim1, 1, 1, 1])

            zz_range = tf.tile(K.expand_dims(K.arange(0, dim1), axis=0),
                              K.stack([batch_shape, 1]))
            zz_range = K.expand_dims(zz_range, axis=-1)
            zz_range = K.expand_dims(zz_range, axis=-1)

            zz_channels = tf.tile(zz_range,
                                 [1, 1, dim2, dim3])
            zz_channels = K.expand_dims(zz_channels, axis=-1)

            xx_channels = K.cast(xx_channels, K.floatx())
            xx_channels = xx_channels / K.cast(dim2 - 1, K.floatx())
            xx_channels = xx_channels * 2 - 1.

            yy_channels = K.cast(yy_channels, K.floatx())
            yy_channels = yy_channels / K.cast(dim3 - 1, K.floatx())
            yy_channels = yy_channels * 2 - 1.

            zz_channels = K.cast(zz_channels, K.floatx())
            zz_channels = zz_channels / K.cast(dim1 - 1, K.floatx())
            zz_channels = zz_channels * 2 - 1.

            outputs = K.concatenate([inputs, zz_channels, xx_channels, yy_channels],
                                    axis=-1)

            if self.data_format == 'channels_first':
                outputs = K.permute_dimensions(outputs, [0, 4, 1, 2, 3])

        return outputs
 def sampling(args):
     z_mean, z_log_var = args
     epsilon = K.random_normal(shape=(K.shape(z_mean)[0], input_shape),
                               mean=0.,
                               stddev=1.)
     return z_mean + K.exp(z_log_var / 2) * epsilon
 def call(self, inputs, **kwargs):
     input_shape = K.shape(inputs)
     batch_size, seq_len = input_shape[0], input_shape[1]
     pos_embeddings = self.embeddings[:seq_len]
     pos_embeddings = K.expand_dims(pos_embeddings, 0)
     return inputs + pos_embeddings
Beispiel #55
0
 def eye_like(C):
     return K.eye(K.shape(C)[0])
Beispiel #56
0
    def build(self, mode, config):
        """Build Mask R-CNN architecture.
            input_shape: The shape of the input image.
            mode: Either "training" or "inference". The inputs and
                outputs of the model differ accordingly.
        """
        assert mode in ['training', 'inference']

        # Image size must be dividable by 2 multiple times
        h, w = config.IMAGE_SHAPE[:2]
        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
            raise Exception(
                "Image size must be dividable by 2 at least 6 times "
                "to avoid fractions when downscaling and upscaling."
                "For example, use 256, 320, 384, 448, 512, ... etc. ")

        # Inputs
        input_image = KL.Input(shape=config.IMAGE_SHAPE.tolist(),
                               name="input_image")
        input_image_meta = KL.Input(shape=[None], name="input_image_meta")
        if mode == "training":
            # RPN GT
            input_rpn_match = KL.Input(shape=[None, 1],
                                       name="input_rpn_match",
                                       dtype=tf.int32)
            input_rpn_bbox = KL.Input(shape=[None, 4],
                                      name="input_rpn_bbox",
                                      dtype=tf.float32)

            # Detection GT (class IDs, bounding boxes, and masks)
            # 1. GT Class IDs (zero padded)
            input_gt_class_ids = KL.Input(shape=[None],
                                          name="input_gt_class_ids",
                                          dtype=tf.int32)
            # 2. GT Boxes in pixels (zero padded)
            # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates
            input_gt_boxes = KL.Input(shape=[None, 4],
                                      name="input_gt_boxes",
                                      dtype=tf.float32)
            # Normalize coordinates
            h, w = K.shape(input_image)[1], K.shape(input_image)[2]
            image_scale = K.cast(K.stack([h, w, h, w], axis=0), tf.float32)
            gt_boxes = KL.Lambda(lambda x: x / image_scale)(input_gt_boxes)
            # 3. GT Masks (zero padded)
            # [batch, height, width, MAX_GT_INSTANCES]
            if config.USE_MINI_MASK:
                input_gt_masks = KL.Input(shape=[
                    config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], None
                ],
                                          name="input_gt_masks",
                                          dtype=bool)
            else:
                input_gt_masks = KL.Input(
                    shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None],
                    name="input_gt_masks",
                    dtype=bool)

        # Build the shared convolutional layers.
        # Bottom-up Layers
        # Returns a list of the last layers of each stage, 5 in total.
        # Don't create the thead (stage 5), so we pick the 4th item in the list.
        _, C2, C3, C4, C5 = resnet_graph(input_image, "resnet101", stage5=True)
        # Top-down Layers
        # TODO: add assert to varify feature map sizes match what's in tf_config
        P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5)
        P4 = KL.Add(name="fpn_p4add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
            KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4)
        ])
        P3 = KL.Add(name="fpn_p3add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
            KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3)
        ])
        P2 = KL.Add(name="fpn_p2add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
            KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2)
        ])
        # Attach 3x3 conv to all P layers to get the final feature maps.
        P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2)
        P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3)
        P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4)
        P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5)
        # P6 is used for the 5th anchor scale in RPN. Generated by
        # subsampling from P5 with stride of 2.
        P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)

        # Note that P6 is used in RPN, but not in the classifier heads.
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        mrcnn_feature_maps = [P2, P3, P4, P5]

        # Generate Anchors
        self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                                      config.RPN_ANCHOR_RATIOS,
                                                      config.BACKBONE_SHAPES,
                                                      config.BACKBONE_STRIDES,
                                                      config.RPN_ANCHOR_STRIDE)

        # RPN Model
        rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE,
                              len(config.RPN_ANCHOR_RATIOS), 256)
        # Loop through pyramid layers
        layer_outputs = []  # list of lists
        for p in rpn_feature_maps:
            layer_outputs.append(rpn([p]))
            # Concatenate layer outputs# RPN GT
            input_rpn_match = KL.Input(shape=[None, 1],
                                       name="input_rpn_match",
                                       dtype=tf.int32)
            input_rpn_bbox = KL.Input(shape=[None, 4],
                                      name="input_rpn_bbox",
                                      dtype=tf.float32)

            # Detection GT (class IDs, bounding boxes, and masks)
            # 1. GT Class IDs (zero padded)
            input_gt_class_ids = KL.Input(shape=[None],
                                          name="input_gt_class_ids",
                                          dtype=tf.int32)
            # 2. GT Boxes in pixels (zero padded)
            # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates
            input_gt_boxes = KL.Input(shape=[None, 4],
                                      name="input_gt_boxes",
                                      dtype=tf.float32)
            # Normalize coordinates
            h, w = K.shape(input_image)[1], K.shape(input_image)[2]
            image_scale = K.cast(K.stack([h, w, h, w], axis=0), tf.float32)
            gt_boxes = KL.Lambda(lambda x: x / image_scale)(input_gt_boxes)
            # 3. GT Masks (zero padded)
            # [batch, height, width, MAX_GT_INSTANCES]
            if config.USE_MINI_MASK:
                input_gt_masks = KL.Input(shape=[
                    config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], None
                ],
                                          name="input_gt_masks",
                                          dtype=bool)
            else:
                input_gt_masks = KL.Input(
                    shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None],
                    name="input_gt_masks",
                    dtype=bool)

        # Convert from list of lists of level outputs to list of lists
        # of outputs across levels.
        # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
        output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"]
        outputs = list(zip(*layer_outputs))
        outputs = [
            KL.Concatenate(axis=1, name=n)(list(o))
            for o, n in zip(outputs, output_names)
        ]

        rpn_class_logits, rpn_class, rpn_bbox = outputs

        # Generate proposals
        # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
        # and zero padded.
        proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\
            else config.POST_NMS_ROIS_INFERENCE
        rpn_rois = ProposalLayer(proposal_count=proposal_count,
                                 nms_threshold=config.RPN_NMS_THRESHOLD,
                                 name="ROI",
                                 anchors=self.anchors,
                                 config=config)([rpn_class, rpn_bbox])

        if mode == "training":
            # Class ID mask to mark class IDs supported by the dataset the image
            # came from.
            _, _, _, active_class_ids = KL.Lambda(
                lambda x: parse_image_meta_graph(x),
                mask=[None, None, None, None])(input_image_meta)

            if not config.USE_RPN_ROIS:
                # Ignore predicted ROIs and use ROIs provided as an input.
                input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4],
                                      name="input_roi",
                                      dtype=np.int32)
                # Normalize coordinates to 0-1 range.
                target_rois = KL.Lambda(lambda x: K.cast(x, tf.float32) /
                                        image_scale[:4])(input_rois)
            else:
                target_rois = rpn_rois

            # Generate detection targets
            # Subsamples proposals and generates target outputs for training
            # Note that proposal class IDs, gt_boxes, and gt_masks are zero
            # padded. Equally, returned rois and targets are zero padded.
            rois, target_class_ids, target_bbox, target_mask =\
                DetectionTargetLayer(config, name="proposal_targets")([
                    target_rois, input_gt_class_ids, gt_boxes, input_gt_masks])

            # Network Heads
            # TODO: verify that this handles zero padded ROIs
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
                fpn_classifier_graph(rois, mrcnn_feature_maps, config.IMAGE_SHAPE,
                                     config.POOL_SIZE, config.NUM_CLASSES)

            mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps,
                                              config.IMAGE_SHAPE,
                                              config.MASK_POOL_SIZE,
                                              config.NUM_CLASSES)

            # TODO: clean up (use tf.identify if necessary)
            output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois)

            # Losses
            rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x),
                                       name="rpn_class_loss")(
                                           [input_rpn_match, rpn_class_logits])
            rpn_bbox_loss = KL.Lambda(
                lambda x: rpn_bbox_loss_graph(config, *x),
                name="rpn_bbox_loss")(
                    [input_rpn_bbox, input_rpn_match, rpn_bbox])
            class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x),
                                   name="mrcnn_class_loss")([
                                       target_class_ids, mrcnn_class_logits,
                                       active_class_ids
                                   ])
            bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x),
                                  name="mrcnn_bbox_loss")([
                                      target_bbox, target_class_ids, mrcnn_bbox
                                  ])
            mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x),
                                  name="mrcnn_mask_loss")([
                                      target_mask, target_class_ids, mrcnn_mask
                                  ])

            # Model
            inputs = [
                input_image, input_image_meta, input_rpn_match, input_rpn_bbox,
                input_gt_class_ids, input_gt_boxes, input_gt_masks
            ]
            if not config.USE_RPN_ROIS:
                inputs.append(input_rois)
            outputs = [
                rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits,
                mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois,
                rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss
            ]
            model = KM.Model(inputs, outputs, name='mask_rcnn')
        else:
            # Network Heads
            # Proposal classifier and BBox regressor heads
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
                fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, config.IMAGE_SHAPE,
                                     config.POOL_SIZE, config.NUM_CLASSES)

            # Detections
            # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates
            detections = DetectionLayer(config, name="mrcnn_detection")(
                [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta])

            # Convert boxes to normalized coordinates
            # TODO: let DetectionLayer return normalized coordinates to avoid
            #       unnecessary conversions
            h, w = config.IMAGE_SHAPE[:2]
            detection_boxes = KL.Lambda(
                lambda x: x[..., :4] / np.array([h, w, h, w]))(detections)

            # Create masks for detections
            mrcnn_mask = build_fpn_mask_graph(detection_boxes,
                                              mrcnn_feature_maps,
                                              config.IMAGE_SHAPE,
                                              config.MASK_POOL_SIZE,
                                              config.NUM_CLASSES)

            model = KM.Model([input_image, input_image_meta], [
                detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois,
                rpn_class, rpn_bbox
            ],
                             name='mask_rcnn')

        # Add multi-GPU support.
        if config.GPU_COUNT > 1:
            from parallel_model import ParallelModel
            model = ParallelModel(model, config.GPU_COUNT)

        return model
Beispiel #57
0
def sample_z(args):
  z_mu, z_sigma = args
  eps = K.random_normal(shape=(K.shape(z_mu)[0], K.int_shape(z_mu)[1]))
  return z_mu + K.exp(z_sigma / 2) * eps
Beispiel #58
0
 def shape(self, x):
     return K.int_shape(x) if self.backend == 'tensorflow' else K.shape(x)
Beispiel #59
0
decoder = Sequential([
    Dense(intermediate_dim, input_dim=latent_dim, activation='relu'),
    Dense(original_dim, activation='sigmoid')
])

x = Input(shape=(original_dim, ))
h = Dense(intermediate_dim, activation='relu')(x)

z_mu = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)

z_mu, z_log_var = KLDivergenceLayer()([z_mu, z_log_var])
z_sigma = Lambda(lambda t: K.exp(.5 * t))(z_log_var)

eps = Input(tensor=K.random_normal(stddev=epsilon_std,
                                   shape=(K.shape(x)[0], latent_dim)))
z_eps = Multiply()([z_sigma, eps])
z = Add()([z_mu, z_eps])
x_pred = decoder(z)
vae = Model(inputs=[x, eps], outputs=x_pred)
vae.compile(optimizer='adam', loss=nll)
vae.fit(X_train,
        X_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_test, X_test))
encoder = Model(x, z_mu)
# plot 2D del espacio latente del autoencoder
z_test = encoder.predict(X_test, batch_size=batch_size)

plt.figure(figsize=(6, 6))
Beispiel #60
0
 def sampling(args):
     mu, log_var = args
     epsilon = K.random_normal(shape=K.shape(mu), mean=0, stddev=1.0)
     return mu + K.exp(log_var / 2) * epsilon