Esempio n. 1
0
def getPointMaskerRefinedHead(backbone, out_side_len, in_channels):
    out_shape = (out_side_len, out_side_len, 1)

    method = tf.image.ResizeMethod.BILINEAR
    x = layerUtils.depthwiseConvBlock(backbone,
                                      in_channels,
                                      64,
                                      dilation_rate=[2, 2])
    x = layerUtils.depthwiseConvBlock(x, 64, 16)
    x = layerUtils.Resize(14, method)(x)
    x = layerUtils.depthwiseConvBlock(x, 16, 1, final_activation='linear')
    x = layerUtils.Resize(out_side_len, method)(x)
    return x
Esempio n. 2
0
def getPointMasker(im_side_len, mask_side_len, compile=True):
    im_shape = (im_side_len, im_side_len, 3)

    # lip only for now
    l = mask_side_len
    num_coords = 13
    img_input = Input(shape=im_shape)
    label_masks = Input(shape=(l, l, num_coords))

    z = []

    # 224x224
    x = Convolution2D(32, (3, 3),
                      strides=(2, 2),
                      padding='same',
                      use_bias=False)(img_input)

    # 112x112
    x = layerUtils.depthwiseConvBlock(x, 32, 64, down_sample=True)
    x = layerUtils.depthwiseConvBlock(x, 64, 64)
    backbone = layerUtils.depthwiseConvBlock(x, 64, 64)

    # 56x56
    x = layerUtils.depthwiseConvBlock(x, 64, 128, down_sample=True)

    # 28x28
    x = layerUtils.depthwiseConvBlock(x, 128, 256, down_sample=True)

    # 14x14
    # having a larger kernel size gives a larger receptive field, which helps prevent misclassification
    x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[2, 2])
    x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[4, 4])
    x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[8, 8])

    method = tf.image.ResizeMethod.BILINEAR
    x = layerUtils.depthwiseConvBlock(x, 256, 128)
    x = layerUtils.Resize(28, method)(x)
    x = layerUtils.depthwiseConvBlock(x,
                                      128,
                                      num_coords,
                                      final_activation='linear')
    pred = x

    model = Model(inputs=[img_input], outputs=[pred])

    #optimizer = optimizers.adam(lr=6E-2)
    if compile:
        optimizer = optimizers.SGD(lr=5E-5, momentum=0.9, nesterov=True)
        model.compile(loss=[pointMaskSigmoidLoss], optimizer=optimizer)
    return model, backbone
Esempio n. 3
0
    def getPointMaskerSmall(self, in_side_len, out_side_len, in_channels,
                            out_channels):
        # This one does not compile on its own
        im_shape = (in_side_len, in_side_len, in_channels)
        out_shape = (out_side_len, out_side_len, out_channels)
        img_input = Input(im_shape)

        method = tf.image.ResizeMethod.BILINEAR
        x = Convolution2D(16, (3, 3),
                          strides=(2, 2),
                          padding='same',
                          use_bias=False)(img_input)
        x = layerUtils.depthwiseConvBlock(x, 16, 64, down_sample=True)
        x = layerUtils.depthwiseConvBlock(x, 64, 64, dilation_rate=[2, 2])
        x = layerUtils.depthwiseConvBlock(x, 64, 64, dilation_rate=[4, 4])
        x = layerUtils.depthwiseConvBlock(x, 64, 16)
        x = layerUtils.Resize(14, method)(x)
        x = layerUtils.depthwiseConvBlock(x,
                                          16,
                                          out_channels,
                                          final_activation='linear')
        x = layerUtils.Resize(out_side_len, method)(x)
        model = Model(inputs=img_input, outputs=x)
        return model
Esempio n. 4
0
    def cascadedPointMaskSigmoidLoss(self, y_true, y_pred):
        num_coords = 13
        method = tf.image.ResizeMethod.BILINEAR

        # split the preds up into their component parts (dammit keras!)
        base_preds = y_pred[:, :, :, :13]
        base_preds = tf.stop_gradient(base_preds)
        refined_preds = y_pred[:, :, :, 13:]

        # get crops;
        # this is actually repetitive code from the model architecture,
        # blame keras for inflexible loss function arguments
        base_preds_normalized = Activation('sigmoid')(base_preds)
        mask_means = layerUtils.MaskMean()(base_preds_normalized)
        true_means = layerUtils.MaskMean()(y_true)
        #boxes = layerUtils.BoxesFromCenters(28.0 / self.im_height)(true_means)
        #boxes = layerUtils.PerturbBboxes([0.8, 1.2], [-0.25, 0.25])(boxes)
        boxes = layerUtils.BoxesFromCenters(28.0 / self.im_height)(mask_means)

        # avoid penalizing refined mask when the initial estimate is not even close to truth
        sqrd_diffs = tf.squared_difference(mask_means, true_means)
        dists = tf.sqrt(tf.reduce_sum(sqrd_diffs, axis=-1))
        thresh = 0.30 * 28.0 / self.im_height
        loss_mask = tf.where(dists < thresh, tf.ones(tf.shape(dists)),
                             tf.zeros(tf.shape(dists)))
        loss_mask = tf.expand_dims(loss_mask, 1)
        loss_mask = tf.expand_dims(loss_mask, 1)

        label_crops = []
        for i in range(num_coords):
            box = Lambda(lambda x: x[:, i, :])(boxes)
            label_mask = Lambda(lambda x: x[:, :, :, i])(y_true)
            label_mask = tf.expand_dims(label_mask, axis=-1)
            label_crop = layerUtils.CropAndResize(28)([label_mask, box])
            label_crops.append(label_crop)

        labels = Concatenate()(label_crops)
        labels *= loss_mask
        refined_preds = layerUtils.Resize(28, method)(refined_preds)
        refined_preds *= loss_mask

        return self.pointMaskDistanceLoss(labels, refined_preds)
Esempio n. 5
0
 def pointMaskDistanceLossPresetDims(self, labels, preds):
     method = tf.image.ResizeMethod.BILINEAR
     labels = layerUtils.Resize(28, method)(labels)
     preds = layerUtils.Resize(28, method)(preds)
     return self.pointMaskDistanceLoss(labels, preds)
Esempio n. 6
0
    def getLipMaskerZoomed(self, alpha=1):

        # for bbox regressor
        alpha_1 = alpha

        # for mask cnn
        alpha_2 = 1.0
        input_tensor = None
        shallow = False
        input_shape = (self.im_height, self.im_width, 3)

        # https://github.com/rcmalli/keras-mobilenet/blob/master/keras_mobilenet/mobilenet.py
        input_shape = _obtain_input_shape(input_shape,
                                          default_size=224,
                                          min_size=96,
                                          data_format=K.image_data_format(),
                                          require_flatten=True)

        if input_tensor is None:
            img_input = Input(shape=input_shape)
        else:
            if not K.is_keras_tensor(input_tensor):
                img_input = Input(tensor=input_tensor, shape=input_shape)
            else:
                img_input = input_tensor

        # labels to be set as inputs as well
        mask_gts = Input(shape=(self.im_height, self.im_width, 1))
        """
        # Mask head: 
        # https://arxiv.org/pdf/1703.06870.pdf
        #a = layerUtils.CropAndResize(7)([x, b])
        #a = Convolution2D(int(512 * alpha), (3, 3), strides=(2, 2), padding='same', use_bias=False)(x)
        #a = layerUtils.depthwiseConvBlock(x, 512 * alpha, 512 * alpha)
        """

        # note to self: alternative to sharing features -- just use a new fully-convolutional architecture
        a = layerUtils.Resize(112, tf.image.ResizeMethods.BILINEAR)(img_input)
        a = Convolution2D(int(32 * alpha_2), (3, 3),
                          strides=(2, 2),
                          padding='same',
                          use_bias=False)(a)
        a = BatchNormalization()(a)
        a = Activation('relu')(a)
        a = layerUtils.depthwiseConvBlock(a, 32 * alpha_2, 64 * alpha_2)
        a = layerUtils.depthwiseConvBlock(a,
                                          64 * alpha_2,
                                          128 * alpha_2,
                                          down_sample=True)
        a = layerUtils.depthwiseConvBlock(a, 128 * alpha_2, 128 * alpha_2)
        a = layerUtils.depthwiseConvBlock(a,
                                          128 * alpha_2,
                                          256 * alpha_2,
                                          down_sample=True)
        a = layerUtils.depthwiseConvBlock(a, 256 * alpha_2, 256 * alpha_2)
        a = layerUtils.depthwiseConvBlock(a,
                                          256 * alpha_2,
                                          512 * alpha_2,
                                          down_sample=True)
        if not shallow:
            for _ in range(5):
                a = layerUtils.depthwiseConvBlock(a, 512 * alpha_2,
                                                  512 * alpha_2)

        # 7x7
        conv_transpose_depth = 128
        a = Conv2DTranspose(int(conv_transpose_depth * alpha_2),
                            kernel_size=(3, 3),
                            strides=(2, 2),
                            activation='relu',
                            padding='same',
                            data_format='channels_last')(a)
        for i in range(3):
            a = layerUtils.depthwiseConvBlock(a,
                                              conv_transpose_depth * alpha_2,
                                              conv_transpose_depth * alpha_2)

        # 14x14
        a = Conv2DTranspose(int(conv_transpose_depth * alpha_2),
                            kernel_size=(3, 3),
                            strides=(2, 2),
                            activation='relu',
                            padding='same',
                            data_format='channels_last')(a)
        for i in range(3):
            a = layerUtils.depthwiseConvBlock(a,
                                              conv_transpose_depth * alpha_2,
                                              conv_transpose_depth * alpha_2)

        # 28x28
        a = Conv2DTranspose(int(conv_transpose_depth * alpha_2),
                            kernel_size=(3, 3),
                            strides=(2, 2),
                            activation='relu',
                            padding='same',
                            data_format='channels_last')(a)
        a = layerUtils.depthwiseConvBlock(a, conv_transpose_depth * alpha_2, 1)

        #a = Lambda(lambda a: K.squeeze(a, axis=-1))(a)
        if input_tensor is not None:
            inputs = get_source_inputs(input_tensor)
        else:
            inputs = img_input

        # a is the unnormalized bboxes
        masks = Activation('sigmoid', name='masks')(a)
        #bboxes = Lambda(lambda b: b, name='bboxes')(b)

        #mask_loss = layerUtils.MaskSigmoidLossLayer(self.mask_side_len, name='mask_obj')([mask_gts, a, bboxes])

        # try to generate ground truth masks (which were obtained from ground truth crops)
        #mask_loss = layerUtils.MaskSigmoidLossLayer(self.mask_side_len, name='mask_obj')([mask_gts, a, bboxes])
        mask_loss = layerUtils.MaskSigmoidLossLayerNoCrop(
            self.mask_side_len, name='mask_obj')([mask_gts, a])
        #mask_gts_cropped = layerUtils.CropAndResize(self.mask_side_len)([mask_gts, bbox_gts])
        #mask_gts_cropped = Lambda(lambda a: K.squeeze(a, axis=-1))(mask_gts_cropped)
        #bbox_loss = layerUtils.SquaredDistanceLossLayer(name='bbox_obj')([bbox_gts, bboxes])
        #total_loss = Lambda(lambda(l1, l2) : l1 + l2)([mask_loss, bbox_loss])

        #model = Model(inputs=[inputs, bbox_gts, mask_gts], outputs=[mask_loss, bbox_loss, bboxes, mask_gts_cropped])
        model = Model(inputs=[inputs, mask_gts], outputs=[mask_loss, masks])
        optimizer = optimizers.adam(lr=4E-4)
        model.compile(loss=[self.identityLoss, None], optimizer=optimizer)
        #model = Model(inputs=[inputs, bbox_gts, mask_gts], outputs=[mask_loss])
        #model.compile(loss=[self.identityLoss], optimizer='adam')
        #model.summary()
        return model
Esempio n. 7
0
    def getPointMasker(self):
        im_shape = (self.im_width, self.im_height, 3)
        masks_shape = (self.mask_side_len, self.mask_side_len, self.num_coords)
        summed_masks_shape = (self.mask_side_len, self.mask_side_len, 1)
        img_input = Input(shape=im_shape)
        label_masks = Input(shape=masks_shape)
        label_summed_masks = Input(shape=summed_masks_shape)

        x = Convolution2D(32, (3, 3),
                          strides=(1, 1),
                          padding='same',
                          use_bias=False)(img_input)

        num_features = [64, 128, 256, 512, 512]
        z_layers = [None] * 4
        x, z_layers[0] = layerUtils.rcfBlock(x,
                                             32,
                                             num_features[0],
                                             2,
                                             z_out_layers=2)
        x, z_layers[1] = layerUtils.rcfBlock(x,
                                             num_features[0],
                                             num_features[1],
                                             2,
                                             z_out_layers=4)
        x, z_layers[2] = layerUtils.rcfBlock(x,
                                             num_features[1],
                                             num_features[2],
                                             3,
                                             z_out_layers=1)
        x, z_layers[3] = layerUtils.rcfBlock(x,
                                             num_features[2],
                                             num_features[3],
                                             3,
                                             z_out_layers=1)
        #x, z_layers[4] = layerUtils.rcfBlock(x, num_features[3], num_features[4], 3, z_out_layers=1)

        # want 112x112 feature maps
        z_layers[0] = layerUtils.depthwiseConvBlock(z_layers[0],
                                                    2,
                                                    4,
                                                    down_sample=True)

        # upsample
        z_layers[2] = Conv2DTranspose(1,
                                      kernel_size=(3, 3),
                                      strides=(2, 2),
                                      activation='linear',
                                      padding='same')(z_layers[2])
        z_layers[2] = Convolution2D(1, (1, 1))(z_layers[2])

        z_layers[3] = Conv2DTranspose(1,
                                      kernel_size=(3, 3),
                                      strides=(4, 4),
                                      activation='linear',
                                      padding='same')(z_layers[3])
        z_layers[3] = Convolution2D(1, (1, 1))(z_layers[3])
        """
        # long strides xD
        z_layers[4] = Conv2DTranspose(
            1, kernel_size=(3, 3),
            strides=(8, 8),
            activation='linear',
            padding='same')(z_layers[4])"""

        final = Concatenate()(z_layers)
        final = layerUtils.depthwiseConvBlock(final, 10, 32, down_sample=True)
        final = layerUtils.depthwiseConvBlock(final, 32, self.num_coords)

        # losses
        losses = 3 * [None]

        # final prediction is 56x56
        label_masks_downsampled = layerUtils.Resize(
            self.mask_side_len / 2, tf.image.ResizeMethod.AREA)(label_masks)
        losses[0] = layerUtils.MaskSigmoidLossLayerNoCrop(
            self.mask_side_len)([label_summed_masks, z_layers[2]])
        losses[1] = layerUtils.MaskSigmoidLossLayerNoCrop(
            self.mask_side_len)([label_summed_masks, z_layers[3]])
        losses[2] = layerUtils.PointMaskSoftmaxLossLayer(
            self.mask_side_len / 2)([label_masks_downsampled, final])

        # names
        losses[0] = Lambda(lambda x: x, name='z2')(losses[0])
        losses[1] = Lambda(lambda x: x, name='z3')(losses[1])
        losses[2] = Lambda(lambda x: x, name='final')(losses[2])

        model = Model(inputs=[img_input, label_masks, label_summed_masks],
                      outputs=[
                          losses[0], losses[1], losses[2], z_layers[2],
                          z_layers[3], final
                      ])
        optimizer = optimizers.adam(lr=3E-3)
        model.compile(loss=[
            self.identityLoss, self.identityLoss, self.identityLoss, None,
            None, None
        ],
                      optimizer=optimizer)
        return model
Esempio n. 8
0
    def getPointMaskerConcat(self, compile=True):
        im_shape = (self.im_width, self.im_height, 3)

        # lip only for now
        l = self.mask_side_len
        num_coords = 13
        img_input = Input(shape=im_shape)
        label_masks = Input(shape=(l, l, num_coords))

        z = []

        # 224x224
        x = Convolution2D(32, (3, 3),
                          strides=(2, 2),
                          padding='same',
                          use_bias=False)(img_input)

        # 112x112
        x = layerUtils.depthwiseConvBlock(x, 32, 64, down_sample=True)
        x = layerUtils.depthwiseConvBlock(x, 64, 64)
        backbone = layerUtils.depthwiseConvBlock(x, 64, 64)

        # 56x56
        x = layerUtils.depthwiseConvBlock(x, 64, 128, down_sample=True)

        # 28x28
        #z.append(layerUtils.depthwiseConvBlock(b, 128, 128))
        #z.append(layerUtils.depthwiseConvBlock(x, 128, 128))
        x = layerUtils.depthwiseConvBlock(x, 128, 256, down_sample=True)

        # 14x14
        # having a larger kernel size gives a larger receptive field, which helps prevent misclassification
        x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[2, 2])
        x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[4, 4])
        x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[8, 8])

        #z.append(x)

        method = tf.image.ResizeMethod.BILINEAR
        x = layerUtils.depthwiseConvBlock(x, 256, 128)
        x = layerUtils.Resize(28, method)(x)

        #z[0] = layerUtils.depthwiseConvBlock(z[0], 128, 64)
        #x = Concatenate()([x, z[0]])

        #x = layerUtils.depthwiseConvBlock(x, 192, 128)
        x = layerUtils.depthwiseConvBlock(x,
                                          128,
                                          num_coords,
                                          final_activation='linear')
        #x = layerUtils.depthwiseConvBlock(x, 192, num_coords, final_activation='linear')
        #x = layerUtils.depthwiseConvBlock(x, 32, num_coords, final_activation='linear')

        #loss = layerUtils.PointMaskSoftmaxLossLayer(l)([label_masks, x])
        #loss = layerUtils.MaskSigmoidLossLayerNoCrop(l)([label_masks, x])
        #x = Activation('sigmoid')(x)
        pred = x
        #loss = Lambda(lambda x: x, name='f0')(loss)

        model = Model(inputs=[img_input], outputs=[pred])

        #optimizer = optimizers.adam(lr=6E-2)
        if compile:
            optimizer = optimizers.SGD(lr=5E-5, momentum=0.9, nesterov=True)
            model.compile(loss=[self.pointMaskSigmoidLoss],
                          metrics=[self.pointMaskDistance],
                          optimizer=optimizer)
        return model, backbone