コード例 #1
0
ファイル: model.py プロジェクト: lcalem/partial-labels
    def build(self):
        '''
        Build Mask R-CNN architecture.
            input_shape: The shape of the input image.
            mode: Either "training" or "inference". The inputs and
                outputs of the model differ accordingly.
        '''
        # Inputs
        input_image = KL.Input(shape=[None, None, self.cfg.IMAGE.NB_CHANNELS],
                               name="input_image")
        input_image_meta = KL.Input(shape=[self.cfg.IMAGE_META_SIZE],
                                    name="input_image_meta")

        if self.mode == 'training':
            # RPN GT
            input_rpn_match = KL.Input(shape=[None, 1],
                                       name="input_rpn_match",
                                       dtype=tf.int32)
            input_rpn_bbox = KL.Input(shape=[None, 4],
                                      name="input_rpn_bbox",
                                      dtype=tf.float32)

            # Detection GT (class IDs, bounding boxes, and masks)

            # 1. GT Class IDs (zero padded)
            input_gt_class_ids = KL.Input(shape=[None],
                                          name="input_gt_class_ids",
                                          dtype=tf.int32)

            # 2. GT Boxes in pixels (zero padded)
            # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates
            input_gt_boxes = KL.Input(shape=[None, 4],
                                      name="input_gt_boxes",
                                      dtype=tf.float32)
            # Normalize coordinates
            gt_boxes = KL.Lambda(lambda x: gutils.norm_boxes_graph(
                x,
                K.shape(input_image)[1:3]))(input_gt_boxes)

        elif self.mode == 'inference':
            # Anchors in normalized coordinates
            input_anchors = KL.Input(shape=[None, 4], name="input_anchors")

        # Build the shared convolutional layers.
        # Bottom-up Layers
        # Returns a list of the last layers of each stage, 5 in total.
        # Don't create the thead (stage 5), so we pick the 4th item in the list.
        if callable(self.cfg.ARCHI.BACKBONE):
            _, C2, C3, C4, C5 = self.cfg.ARCHI.BACKBONE(
                input_image, stage5=True, train_bn=self.cfg.ARCHI.TRAIN_BN)
        else:
            _, C2, C3, C4, C5 = resnet.resnet_graph(
                input_image,
                self.cfg.ARCHI.BACKBONE,
                stage5=True,
                train_bn=self.cfg.ARCHI.TRAIN_BN)

        # Top-down Layers
        # TODO: add assert to varify feature map sizes match what's in config
        P5 = KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (1, 1),
                       name='fpn_c5p5')(C5)
        P4 = KL.Add(name="fpn_p4add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
            KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (1, 1),
                      name='fpn_c4p4')(C4)
        ])
        P3 = KL.Add(name="fpn_p3add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
            KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (1, 1),
                      name='fpn_c3p3')(C3)
        ])
        P2 = KL.Add(name="fpn_p2add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
            KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (1, 1),
                      name='fpn_c2p2')(C2)
        ])
        # Attach 3x3 conv to all P layers to get the final feature maps.
        P2 = KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (3, 3),
                       padding="SAME",
                       name="fpn_p2")(P2)
        P3 = KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (3, 3),
                       padding="SAME",
                       name="fpn_p3")(P3)
        P4 = KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (3, 3),
                       padding="SAME",
                       name="fpn_p4")(P4)
        P5 = KL.Conv2D(self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE, (3, 3),
                       padding="SAME",
                       name="fpn_p5")(P5)
        # P6 is used for the 5th anchor scale in RPN. Generated by
        # subsampling from P5 with stride of 2.
        P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)

        # Note that P6 is used in RPN, but not in the classifier heads.
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        mrcnn_feature_maps = [P2, P3, P4, P5]

        # Anchors
        if self.mode == 'training':
            anchors = self.get_anchors(self.img_shape)
            # Duplicate across the batch dimension because Keras requires it
            # TODO: can this be optimized to avoid duplicating the anchors?
            anchors = np.broadcast_to(anchors,
                                      (self.cfg.BATCH_SIZE, ) + anchors.shape)
            # A hack to get around Keras's bad support for constants
            anchors = KL.Lambda(lambda x: tf.Variable(anchors),
                                name="anchors")(input_image)
        else:
            anchors = input_anchors

        # RPN Model
        rpn = rpnlib.build_rpn_model(self.cfg.ARCHI.RPN_ANCHOR_STRIDE,
                                     len(self.cfg.ARCHI.RPN_ANCHOR_RATIOS),
                                     self.cfg.ARCHI.TOP_DOWN_PYRAMID_SIZE)

        # Loop through pyramid layers
        layer_outputs = []  # list of lists
        for p in rpn_feature_maps:
            layer_outputs.append(rpn([p]))

        # Concatenate layer outputs
        # Convert from list of lists of level outputs to list of lists
        # of outputs across levels.
        # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
        output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"]
        outputs = list(zip(*layer_outputs))
        outputs = [
            KL.Concatenate(axis=1, name=n)(list(o))
            for o, n in zip(outputs, output_names)
        ]

        rpn_class_logits, rpn_class, rpn_bbox = outputs

        # Generate proposals
        # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates and zero padded.
        proposal_count = self.cfg.ARCHI.POST_NMS_ROIS_TRAINING if self.mode == 'training' else self.cfg.ARCHI.POST_NMS_ROIS_INFERENCE
        rpn_rois = proposal.ProposalLayer(
            proposal_count=proposal_count,
            nms_threshold=self.cfg.ARCHI.RPN_NMS_THRESHOLD,
            name="ROI",
            config=self.cfg)([rpn_class, rpn_bbox, anchors])

        if self.mode == 'training':
            # Class ID mask to mark class IDs supported by the dataset the image came from.
            active_class_ids = KL.Lambda(lambda x: meta.parse_image_meta_graph(
                x)["active_class_ids"])(input_image_meta)

            if not self.cfg.ARCHI.USE_RPN_ROIS:
                # Ignore predicted ROIs and use ROIs provided as an input.
                input_rois = KL.Input(
                    shape=[self.cfg.ARCHI.POST_NMS_ROIS_TRAINING, 4],
                    name='input_roi',
                    dtype=np.int32)

                # Normalize coordinates
                target_rois = KL.Lambda(lambda x: gutils.norm_boxes_graph(
                    x,
                    K.shape(input_image)[1:3]))(input_rois)

            else:
                target_rois = rpn_rois

            # Generate detection targets
            # Subsamples proposals and generates target outputs for training
            # Note that proposal class IDs, gt_boxes, and gt_masks are zero
            # padded. Equally, returned rois and targets are zero padded.
            rois, target_class_ids, target_bbox = detection_target.DetectionTargetLayer(
                self.cfg, name='proposal_targets')(
                    [target_rois, input_gt_class_ids, gt_boxes])

            # Network Heads
            # TODO: verify that this handles zero padded ROIs
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpnlib.fpn_classifier_graph(
                rois,
                mrcnn_feature_maps,
                input_image_meta,
                self.cfg.ARCHI.POOL_SIZE,
                self.cfg.DATASET.NB_CLASSES,
                train_bn=self.cfg.ARCHI.TRAIN_BN,
                fc_layers_size=self.cfg.ARCHI.FPN_CLASSIF_FC_LAYERS_SIZE)

            # TODO: clean up (use tf.identify if necessary)
            output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois)

            # Losses
            rpn_class_loss = KL.Lambda(lambda x: l.rpn_class_loss_graph(*x),
                                       name="rpn_class_loss")(
                                           [input_rpn_match, rpn_class_logits])
            rpn_bbox_loss = KL.Lambda(
                lambda x: l.rpn_bbox_loss_graph(self.cfg, *x),
                name="rpn_bbox_loss")(
                    [input_rpn_bbox, input_rpn_match, rpn_bbox])
            class_loss = KL.Lambda(lambda x: l.mrcnn_class_loss_graph(*x),
                                   name="mrcnn_class_loss")([
                                       target_class_ids, mrcnn_class_logits,
                                       active_class_ids
                                   ])
            bbox_loss = KL.Lambda(lambda x: l.mrcnn_bbox_loss_graph(*x),
                                  name="mrcnn_bbox_loss")([
                                      target_bbox, target_class_ids, mrcnn_bbox
                                  ])

            # Model
            inputs = [
                input_image, input_image_meta, input_rpn_match, input_rpn_bbox,
                input_gt_class_ids, input_gt_boxes
            ]
            if not self.cfg.ARCHI.USE_RPN_ROIS:
                inputs.append(input_rois)

            outputs = [
                rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits,
                mrcnn_class, mrcnn_bbox, rpn_rois, output_rois, rpn_class_loss,
                rpn_bbox_loss, class_loss, bbox_loss
            ]

            model = KM.Model(inputs, outputs, name='mask_rcnn')

        else:
            # Network Heads
            # Proposal classifier and BBox regressor heads
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpnlib.fpn_classifier_graph(
                rpn_rois,
                mrcnn_feature_maps,
                input_image_meta,
                self.cfg.ARCHI.POOL_SIZE,
                self.cfg.ARCHI.NB_CLASSES,
                train_bn=self.cfg.ARCHI.TRAIN_BN,
                fc_layers_size=self.cfg.ARCHI.FPN_CLASSIF_FC_LAYERS_SIZE)

            # Detections
            # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
            # normalized coordinates
            detections = detection.DetectionLayer(self.cfg,
                                                  name="mrcnn_detection")([
                                                      rpn_rois, mrcnn_class,
                                                      mrcnn_bbox,
                                                      input_image_meta
                                                  ])

            model = KM.Model([input_image, input_image_meta, input_anchors], [
                detections, mrcnn_class, mrcnn_bbox, rpn_rois, rpn_class,
                rpn_bbox
            ],
                             name='mask_rcnn')

        # Add multi-GPU support.
        if self.cfg.GPU_COUNT > 1:
            from mrcnn.parallel_model import ParallelModel
            model = ParallelModel(model, self.cfg.GPU_COUNT)

        return model
コード例 #2
0
ファイル: caption.py プロジェクト: weiniuzhu/caption-america
def build_model(**params):
    # TODO: get all these from **params
    CNN = 'resnet'
    INCLUDE_TOP = False
    LEARNABLE_CNN_LAYERS = params['learnable_cnn_layers']
    RNN_TYPE = 'LSTM'
    RNN_SIZE = 1024
    WORDVEC_SIZE = params['wordvec_size']
    ACTIVATION = 'relu'
    USE_CGRU = params['use_cgru']
    CGRU_SIZE = params['cgru_size']
    REDUCE_MEAN = params['reduce_visual']
    max_words = params['max_words']

    if CNN == 'vgg16':
        cnn = applications.vgg16.VGG16(include_top=INCLUDE_TOP)
    elif CNN == 'resnet':
        cnn = applications.resnet50.ResNet50(include_top=INCLUDE_TOP)
        # Pop the mean pooling layer
        cnn = models.Model(inputs=cnn.inputs, outputs=cnn.layers[-2].output)

    for layer in cnn.layers[:-LEARNABLE_CNN_LAYERS]:
        layer.trainable = False

    # Context Vector input
    # normalized to [0,1] the values:
    # left, top, right, bottom, (box area / image area)
    input_ctx = layers.Input(shape=(5, ))
    ctx = layers.BatchNormalization()(input_ctx)
    repeat_ctx = layers.RepeatVector(max_words)(ctx)

    # Global Image featuers (convnet output for the whole image)
    input_img_global = layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH,
                                           IMG_CHANNELS))
    image_global = cnn(input_img_global)

    # Add a residual CGRU layer
    if USE_CGRU:
        image_global = layers.Conv2D(CGRU_SIZE, (1, 1),
                                     padding='same',
                                     activation='relu')(image_global)
        res_cgru = SpatialCGRU(image_global, CGRU_SIZE)
        image_global = layers.add([image_global, res_cgru])

    if REDUCE_MEAN:
        image_global = layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))(
            image_global)
        image_global = layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))(
            image_global)
    else:
        image_global = layers.Conv2D(WORDVEC_SIZE / 4, (3, 3),
                                     activation='relu')(image_global)
        image_global = layers.Conv2D(WORDVEC_SIZE / 2, (3, 3),
                                     activation='relu')(image_global)
        image_global = layers.Flatten()(image_global)

    image_global = layers.Concatenate()([image_global, ctx])
    image_global = layers.Dense(1024, activation='relu')(image_global)

    image_global = layers.BatchNormalization()(image_global)
    image_global = layers.Dense(WORDVEC_SIZE / 2,
                                activation=ACTIVATION)(image_global)
    image_global = layers.BatchNormalization()(image_global)
    image_global = layers.RepeatVector(max_words)(image_global)

    # Local Image featuers (convnet output for just the bounding box)
    input_img_local = layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
    image_local = cnn(input_img_local)

    if USE_CGRU:
        image_local = layers.Conv2D(CGRU_SIZE, (1, 1),
                                    padding='same',
                                    activation='relu')(image_local)
        res_cgru = SpatialCGRU(image_local, CGRU_SIZE)
        image_local = layers.add([image_local, res_cgru])

    if REDUCE_MEAN:
        image_local = layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))(
            image_local)
        image_local = layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))(
            image_local)
    else:
        image_local = layers.Conv2D(WORDVEC_SIZE / 4, (3, 3),
                                    activation='relu')(image_local)
        image_local = layers.Conv2D(WORDVEC_SIZE / 2, (3, 3),
                                    activation='relu')(image_local)
        image_local = layers.Flatten()(image_local)

    image_local = layers.Concatenate()([image_local, ctx])
    image_local = layers.Dense(1024, activation='relu')(image_local)

    image_local = layers.BatchNormalization()(image_local)
    image_local = layers.Dense(WORDVEC_SIZE / 2,
                               activation=ACTIVATION)(image_local)
    image_local = layers.BatchNormalization()(image_local)
    image_local = layers.RepeatVector(max_words)(image_local)

    language_model = models.Sequential()

    input_words = layers.Input(shape=(max_words, ), dtype='int32')
    language = layers.Embedding(words.VOCABULARY_SIZE,
                                WORDVEC_SIZE,
                                input_length=max_words)(input_words)

    x = layers.concatenate([image_global, image_local, repeat_ctx, language])
    if RNN_TYPE == 'LSTM':
        x = layers.LSTM(RNN_SIZE)(x)
    else:
        x = layers.GRU(RNN_SIZE)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(words.VOCABULARY_SIZE, activation='softmax')(x)

    return models.Model(
        inputs=[input_img_global, input_img_local, input_words, input_ctx],
        outputs=x)
コード例 #3
0
def labels_to_image_model(im_shape,
                          n_channels,
                          crop_shape,
                          label_list,
                          n_neutral_labels,
                          vox2ras,
                          nonlin_shape_factor=0.0625,
                          crop_channel2=None,
                          output_div_by_n=None,
                          flipping=True):

    # get shapes
    n_dims, _ = utils.get_dims(im_shape)
    crop_shape = get_shapes(crop_shape, im_shape, output_div_by_n)
    deformation_field_size = utils.get_resample_shape(im_shape,
                                                      nonlin_shape_factor,
                                                      len(im_shape))

    # create new_label_list and corresponding LUT to make sure that labels go from 0 to N-1
    new_label_list, lut = utils.rearrange_label_list(label_list)

    # define mandatory inputs
    image_input = KL.Input(shape=im_shape + [n_channels], name='image_input')
    labels_input = KL.Input(shape=im_shape + [1], name='labels_input')
    aff_in = KL.Input(shape=(n_dims + 1, n_dims + 1), name='aff_input')
    nonlin_field_in = KL.Input(shape=deformation_field_size,
                               name='nonlin_input')
    list_inputs = [image_input, labels_input, aff_in, nonlin_field_in]

    # convert labels to new_label_list
    labels = KL.Lambda(lambda x: tf.gather(
        tf.convert_to_tensor(lut, dtype='int32'), tf.cast(x, dtype='int32')))(
            labels_input)

    # deform labels
    image_input._keras_shape = tuple(image_input.get_shape().as_list())
    labels._keras_shape = tuple(labels.get_shape().as_list())
    labels = KL.Lambda(lambda x: tf.cast(x, dtype='float'))(labels)
    resize_shape = [
        max(int(im_shape[i] / 2), deformation_field_size[i])
        for i in range(len(im_shape))
    ]
    nonlin_field = nrn_layers.Resize(size=resize_shape,
                                     interp_method='linear')(nonlin_field_in)
    nonlin_field = nrn_layers.VecInt()(nonlin_field)
    nonlin_field = nrn_layers.Resize(size=im_shape,
                                     interp_method='linear')(nonlin_field)
    image = nrn_layers.SpatialTransformer(interp_method='linear')(
        [image_input, aff_in, nonlin_field])
    labels = nrn_layers.SpatialTransformer(interp_method='nearest')(
        [labels, aff_in, nonlin_field])
    labels = KL.Lambda(lambda x: tf.cast(x, dtype='int32'))(labels)

    # cropping
    if crop_shape is not None:
        image, crop_idx = l2i_sa.random_cropping(image, crop_shape, n_dims)
        labels = KL.Lambda(
            lambda x: tf.slice(x[0],
                               begin=tf.cast(x[1], dtype='int32'),
                               size=tf.convert_to_tensor(
                                   [-1] + crop_shape + [-1], dtype='int32')))(
                                       [labels, crop_idx])
    else:
        crop_shape = im_shape

    # flipping
    if flipping:
        labels, flip = l2i_sa.label_map_random_flipping(
            labels, label_list, n_neutral_labels, vox2ras, n_dims)
        ras_axes = edit_volumes.get_ras_axes(vox2ras, n_dims)
        flip_axis = [ras_axes[0] + 1]
        image = KL.Lambda(lambda y: K.switch(
            y[0],
            KL.Lambda(lambda x: K.reverse(x, axes=flip_axis))(y[1]), y[1]))(
                [flip, image])

    # convert labels back to original values
    labels = KL.Lambda(
        lambda x: tf.gather(tf.convert_to_tensor(label_list, dtype='int32'),
                            tf.cast(x, dtype='int32')),
        name='labels_out')(labels)

    # intensity augmentation
    image = KL.Lambda(lambda x: K.clip(x, 0, 300), name='clipping')(image)

    # loop over channels
    if n_channels > 1:
        split = KL.Lambda(lambda x: tf.split(x, [1] * n_channels, axis=-1))(
            image)
    else:
        split = [image]
    processed_channels = list()
    for i, channel in enumerate(split):

        # normalise and shift intensities
        image = l2i_ia.min_max_normalisation(image)
        image = KL.Lambda(lambda x: K.random_uniform(
            (1, ), .85, 1.1) * x + K.random_uniform((1, ), -.3, .3))(image)
        image = KL.Lambda(lambda x: K.clip(x, 0, 1))(image)
        image = l2i_ia.gamma_augmentation(image)

        # randomly crop sides of second channel
        if (crop_channel2 is not None) & (channel == 1):
            image = l2i_sa.restrict_tensor(image, crop_channel2, n_dims)

    # concatenate all channels back, and clip output (include labels to keep it when plugging to other models)
    if n_channels > 1:
        image = KL.concatenate(processed_channels)
    else:
        image = processed_channels[0]
    image = KL.Lambda(lambda x: K.clip(x[0], 0, 1),
                      name='image_out')([image, labels])

    # build model
    brain_model = Model(inputs=list_inputs, outputs=[image, labels])
    # shape of returned images
    output_shape = image.get_shape().as_list()[1:]

    return brain_model, output_shape
コード例 #4
0
input3 = KL.Input((2, ))

#对input1做操作得到temp1
temp1 = KL.BatchNormalization(axis=1)(input1)
temp1 = KL.Conv2D(16, (3, 3), padding='same')(temp1)
temp1 = KL.Activation('relu')(temp1)
temp1 = KL.MaxPooling2D(2)(temp1)
temp1 = KL.Flatten()(temp1)
temp1 = KL.Dense(2)(temp1)
#对input2做操作得到temp2
temp2 = KL.Dense(32)(input2)
temp2 = KL.Dense(2)(temp2)

#temp1,temp2计算得到loss1 ,通过Lambda自定义层
#对temp1,input3计算得到loss2
loss1 = KL.Lambda(lambda x: custom_loss1(*x), name='loss1')([temp1, temp2])
loss2 = KL.Lambda(lambda x: custom_loss2(*x), name='loss2')([temp1, input3])
#将输入输出放进model中,建立网络
model = Model([input1, input2, input3], [loss1, loss2])
plot_model(model, to_file='model.png', show_shapes=True)  #查看model 网络结构
#将自定义的loss层的结果取出作为model的loss
loss_layer1 = model.get_layer('loss1').output
loss_layer2 = model.get_layer('loss2').output
model.add_loss(loss_layer1)
model.add_loss(loss_layer2)

model.compile(optimizer='sgd', loss=[None, None])


#yield把函数变成一个生成器,逐块将数据载入,而不是一下子全部载入,减小显存占用
def data_gen(num):
コード例 #5
0
def get_model(self, summary=True, 
              num_capsule=32, len_ui=8, 
              len_vj=16, routing=0, init_lr=0.001,
              l2_constant=0.0, dropout_ratio=0.1, num_classes=10):

    if routing:
        use_routing = True
    else:
        use_routing = False


    input_img = layers.Input((28, 28, 1))
    input_mask = layers.Input((num_classes, len_vj))
    
    # only use experiment reconstruction
    input_permutation = layers.Input((num_classes, len_vj))
    
    conv_layer = layers.Conv2D(256, (9, 9), strides=(1, 1),
                               use_bias=True, kernel_regularizer=l2(l2_constant), activation=None)(input_img)
    conv_layer = layers.Activation('relu')(conv_layer)

    # convolutional capsule layer
    h_i = layers.Conv2D(num_capsule * len_ui,
                        kernel_size=(9, 9),
                        strides=(2, 2),
                        padding='valid',
                        use_bias=True,
                        kernel_regularizer=l2(l2_constant), activation=None)(conv_layer)
    
    h_i = layers.Reshape((K.int_shape(h_i)[1] * K.int_shape(h_i)[2] * num_capsule, len_ui))(h_i)
    h_i = layers.Activation('relu')(h_i)

    # routing algorithm
    image_caps = Routing(num_capsule=num_classes,
                         l2_constant=l2_constant,
                         dim_capsule=len_vj,
                         routing=use_routing,
                         num_routing=3)(h_i)

    output = CapsuleNorm(name='pred_output')(image_caps)
    
    # reconstruction
    
    # mask_output : [B, Num Classes, len_vj]
    mask_output = layers.Multiply()([image_caps, input_mask])
    mask_output = layers.Add()([mask_output, input_permutation])
    
    # mask_output : [B, len_vj]
    mask_output = layers.Lambda(lambda x : K.sum(mask_output, axis=1))(mask_output)

    fc = layers.Dense(512, activation='relu', kernel_regularizer=l2(l2_constant))(mask_output)
    fc = layers.Dense(1024, activation='relu', kernel_regularizer=l2(l2_constant))(fc)
    fc = layers.Dense(784, activation='sigmoid', kernel_regularizer=l2(l2_constant), name='reconstruct')(fc)
    
    model = Model([input_img, input_mask, input_permutation], [output, fc], name='image-capsnet')

    if summary:
        model.summary()

    # compile model
    losses = {"pred_output" : margin_loss, "reconstruct": reconstruct_loss}
    loss_weights = {"pred_output": 1.0, "reconstruct" : 0.0005*784}
    metrics = {"pred_output" : 'accuracy', "reconstruct" : "mae"}
    
    model.compile(loss=losses, loss_weights=loss_weights,
                  optimizer=Adam(init_lr, beta_1=0.9, beta_2=0.999, amsgrad=True),
                  metrics=metrics)
    return model
コード例 #6
0
# initialize keys
# keys = [tf.get_variable("Key_%d" % i, [EMBED_HIDDEN_SIZE], initializer=tf.random_normal_initializer(stddev=0.1))
#         for i in range(NUM_BLOCKS)]


def get_keys(x):
    keys = [key for key in range(vocab_size - NUM_BLOCKS, vocab_size)]
    return tf.squeeze(tf.reshape(keys, [1, -1]))


def get_keys_shape(input_shape):
    return NUM_BLOCKS,


# keys = get_keys(None)
keys = layers.Lambda(get_keys, output_shape=get_keys_shape)(encoded_sentence)
keys = embed_1(keys)
print('embedded_keys', keys)

keys = tf.split(keys, NUM_BLOCKS, axis=0)
keys = [tf.squeeze(key, axis=0) for key in keys]

# create the main Recurrent Entity Network cells
last_state = RENLayer.REN(initial_batch_size=BATCH_SIZE,
                          units=EMBED_HIDDEN_SIZE,
                          num_blocks=NUM_BLOCKS,
                          num_units_per_block=EMBED_HIDDEN_SIZE,
                          vocab_size=vocab_size,
                          keys=keys,
                          activation=activation,
                          initializer='normal')(encoded_sentence)
コード例 #7
0
def build_rnn2(input, caption_gt, masks, config):

    down = KL.Conv2D(512, (3, 3),
                     padding="same",
                     activation="relu",
                     name='gcap_down_imagefeature')(input)

    reshaped_conv5_3_feats = KL.Lambda(
        lambda x: tf.reshape(x, [config.BATCH_SIZE, 64, 512]))(down)
    conv_feats = reshaped_conv5_3_feats

    print("Building the RNN...")

    contexts = conv_feats
    reshaped_contexts = KL.Lambda(lambda x: tf.reshape(x, [-1, 512]))(contexts)
    temp1 = attend_1(reshaped_contexts)
    w_embedding = KL.Embedding(input_dim=5000,
                               output_dim=512,
                               name='gcap_embedding')

    # Setup the LSTM

    # Initialize the LSTM using the mean context
    # with tf.variable_scope("initialize"):
    context_mean = KL.Lambda(lambda x: tf.reduce_mean(x, axis=1))(conv_feats)
    initial_memory, initial_output = initialize(context_mean)
    initial_state = initial_memory, initial_output

    # Prepare to run
    predictions = []
    outputs = []
    current_inputs = []
    num_steps = 15
    last_output = initial_output
    last_memory = initial_memory
    last_word = KL.Lambda(lambda x: K.zeros([config.BATCH_SIZE], 'int32'))(
        input)
    last_state = last_output, last_memory
    alphas = []
    cross_entropies = []
    predictions_correct = []
    lstm = KL.LSTM(
        512,
        return_state=True,
        recurrent_activation='hard_sigmoid',
        name='gcap_lstm',
        unit_forget_bias=False)  #(last_output,initial_state = initial_state)

    # Generate the words one by one
    for idx in range(num_steps):
        # Attention mechanism
        # with tf.variable_scope("attend"):
        # alpha = attend(contexts, last_output)

        # use 2 fc layers to attend

        temp2 = attend_2(last_output)

        temp2 = KL.Lambda(lambda x: tf.reshape(
            tf.tile(tf.expand_dims(x, 1), [1, 64, 1]), [-1, 512]))(temp2)
        temp = KL.Add()([temp1, temp2])
        att_logits = attend_3(temp)
        att_logits = KL.Lambda(lambda x: tf.reshape(x, [-1, 64]))(att_logits)
        alpha = KL.Softmax()(att_logits)
        alpha1 = KL.RepeatVector(512)(alpha)
        alpha1 = KL.Permute((2, 1))(alpha1)
        context = KL.Multiply()([contexts, alpha1])
        context = KL.Lambda(lambda x: tf.reduce_sum(x, axis=1))(context)
        tiled_masks = KL.Lambda(
            lambda x: tf.tile(tf.expand_dims(x[:, idx], 1), [1, 64]))(masks)
        masked_alpha = KL.Lambda(lambda x: tf.reshape(x * tiled_masks, [-1]))(
            alpha)
        alphas.append(masked_alpha)

        word_embed = w_embedding(last_word)
        # Apply the LSTM
        # with tf.variable_scope("lstm"):

        current_input = KL.Concatenate(axis=-1)([context, word_embed])
        current_input = KL.Lambda(lambda x: tf.expand_dims(x, 1))(
            current_input)

        output, memory, cell_out = lstm(current_input,
                                        initial_state=list(last_state))  #
        state = memory, cell_out
        current_inputs.append(current_input)
        outputs.append(output)
        # Decode the expanded output of LSTM into a word
        # with tf.variable_scope("decode"):

        expanded_output = KL.Concatenate(axis=-1)(
            [output, context, word_embed])
        logits = decode(expanded_output)
        # probs = KL.Lambda(lambda x: tf.nn.softmax(logits))(logits)
        prediction = KL.Lambda(lambda x: tf.argmax(x, 1))(logits)
        predictions.append(prediction)

        # Compute the loss for this step, if necessary
        masked_cross_entropy = KL.Lambda(lambda x: caption_loss(*x))(
            [caption_gt[:, idx], logits, masks[:, idx]])
        cross_entropies.append(masked_cross_entropy)

        # ground_truth = KL.Lambda(lambda x: tf.cast(caption_gt[:, idx], tf.int64))(caption_gt)
        # prediction_correct = tf.where(
        #     tf.equal(prediction, ground_truth),
        #     tf.cast(masks[:, idx], tf.float32),
        #     tf.cast(tf.zeros_like(prediction), tf.float32))
        # predictions_correct.append(prediction_correct)

        last_output = output
        last_memory = memory
        last_state = state
        last_word = KL.Lambda(lambda x: tf.reshape(
            tf.cast(x[:, idx], tf.int32), [config.BATCH_SIZE]))(caption_gt)  #

        # tf.get_variable_scope().reuse_variables()

        # Compute the final loss, if necessary
    cross_entropies = KL.Lambda(lambda x: tf.stack(x, axis=1))(cross_entropies)
    cross_entropy_loss = KL.Lambda(
        lambda x: tf.reduce_sum(x) / tf.reduce_sum(masks))(cross_entropies)

    alphas = KL.Lambda(lambda x: tf.reshape(tf.stack(x, axis=1), [1, 64, -1]))(
        alphas)
    attentions = KL.Lambda(lambda x: tf.reduce_sum(x, axis=2))(alphas)
    diffs = KL.Lambda(lambda x: tf.ones_like(x) - x)(attentions)
    attention_loss = KL.Lambda(lambda x: 0.01 * tf.nn.l2_loss(x) / (64))(diffs)

    total_loss = KL.Lambda(lambda x: cross_entropy_loss + x,
                           name="caption_loss")(attention_loss)

    outputs = KL.Lambda(
        lambda x: tf.reshape(x, [config.BATCH_SIZE, num_steps, 512]))(outputs)
    predictions = KL.Lambda(lambda x: tf.reshape(tf.cast(
        x, tf.float32), [config.BATCH_SIZE, num_steps, 1]))(predictions)
    # outputs2 = KL.Lambda(lambda x: tf.concat([outputs,predictions],axis=0))(outputs)

    print("RNN built.")
    return outputs, predictions, total_loss
コード例 #8
0
def build_hani(**model_params):
    """
    :return: the network the mentioned in the Hani et el. paper:
    --------------------------------------------------------
    Khalil-Hani, M., & Sung, L. S. (2014). A convolutional neural
    network approach for face verification. High Performance Computing
    & Simulation (HPCS), 2014 International Conference on, (3), 707–714.
    doi:10.1109/HPCSim.2014.6903759
    """
    def tanh_scaled(x):
        A = 1.7159
        B = 2 / 3
        return A * K.tanh(B * x)

    act = model_params.get('act', tanh_scaled)
    dropout = model_params.get('dropout', 0)
    batchnorm = model_params.get('batchnorm', False)
    loss = model_params.get('loss', contrastive_loss)
    learning_rate = model_params.get('learning_rate', 1e-3)
    input_shape = (IMAGES_DIM, IMAGES_DIM, 1)
    first_input = KL.Input(input_shape)
    second_input = KL.Input(input_shape)

    model = keras.Sequential()
    initialize_weights_conv = keras.initializers.RandomNormal(
        mean=0.0, stddev=0.01, seed=84)  # filters initialize
    initialize_weights_dense = keras.initializers.RandomNormal(
        mean=0.0, stddev=0.2, seed=84)  # dense initialize
    initialize_bias = keras.initializers.RandomNormal(
        mean=0.5, stddev=0.01, seed=84)  # bias initialize

    model.add(
        KL.Conv2D(5, (6, 6),
                  strides=(2, 2),
                  activation=act,
                  input_shape=input_shape,
                  kernel_initializer=initialize_weights_conv,
                  kernel_regularizer=l2(1e-2)))
    if batchnorm:
        model.add(KL.BatchNormalization())
    model.add(KL.MaxPool2D())

    model.add(
        KL.Conv2D(14, (6, 6),
                  strides=(2, 2),
                  activation=act,
                  kernel_initializer=initialize_weights_conv,
                  bias_initializer=initialize_bias,
                  kernel_regularizer=l2(1e-2)))
    if batchnorm:
        model.add(KL.BatchNormalization())
    model.add(KL.MaxPool2D())

    model.add(KL.Dropout(dropout))
    model.add(
        KL.Conv2D(60, (6, 6),
                  activation=act,
                  kernel_initializer=initialize_weights_conv,
                  bias_initializer=initialize_bias,
                  kernel_regularizer=l2(1e-2)))
    if batchnorm:
        model.add(KL.BatchNormalization())
    model.add(KL.MaxPool2D())

    model.add(KL.Flatten())

    model.add(
        KL.Dense(40,
                 activation=act,
                 kernel_regularizer=l2(1e-4),
                 kernel_initializer=initialize_weights_dense,
                 bias_initializer=initialize_bias))
    model.add(
        KL.Dense(40,
                 activation=None,
                 kernel_regularizer=l2(1e-4),
                 kernel_initializer=initialize_weights_dense,
                 bias_initializer=initialize_bias))

    # Generate the encodings (feature vectors) for the two images
    encoded_l = model(first_input)
    encoded_r = model(second_input)

    # calculate similarity
    if loss == 'binary_crossentropy':
        L1_layer = KL.Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))
        L1_distance = L1_layer([encoded_l, encoded_r])
        similarity = KL.Dense(1,
                              activation='sigmoid',
                              bias_initializer=initialize_bias)(L1_distance)
    else:
        similarity = KL.Lambda(euclidean_distance)([encoded_l, encoded_r])

    # final network
    final_network = keras.Model(inputs=[first_input, second_input],
                                outputs=similarity)
    optimizer = keras.optimizers.Adam(lr=learning_rate)
    print(loss)
    final_network.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    return final_network
コード例 #9
0
def define_vanilla_CNN_ResNet(
    input_shape=None,
    classes=10,
    block="basic",
    residual_unit="v2",
    repetitions=[2, 2, 2, 2],
    initial_filters=64,
    activation="softmax",
    include_top=True,
    input_tensor=None,
    dropout=None,
    transition_dilation_rate=(1, 1),
    initial_strides=(2, 2),
    initial_kernel_size=(7, 7),
    initial_pooling="max",
    final_pooling=None,
    top="classification",
    num_gpus=1,
):
    """Builds a custom ResNet18 architecture.
    
    Args:
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` dim ordering)
            or `(3, 224, 224)` (with `channels_first` dim ordering).
            It should have exactly 3 dimensions,
            and width and height should be no smaller than 8.
            E.g. `(224, 224, 3)` would be one valid value.
        classes: The number of outputs at final softmax layer
        block: The block function to use. This is either `'basic'` or `'bottleneck'`.
            The original paper used `basic` for layers < 50.
        repetitions: Number of repetitions of various block units.
            At each block unit, the number of filters are doubled and the input size
            is halved. 
        residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu
            conv. See [Identity Mappings in
            Deep Residual Networks](https://arxiv.org/abs/1603.05027)
            for details.
        dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
            Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
        transition_dilation_rate: Dilation rate for transition layers. For semantic
            segmentation of images use a dilation rate of (2, 2).
        initial_strides: Stride of the very first residual unit and MaxPooling2D call,
            with default (2, 2), set to (1, 1) for small images like cifar.
        initial_kernel_size: kernel size of the very first convolution, (7, 7) for
            imagenet and (3, 3) for small image datasets like tiny imagenet and cifar.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        initial_pooling: Determine if there will be an initial pooling layer,
            'max' for imagenet and None for small image datasets.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        final_pooling: Optional pooling mode for feature extraction at the final
            model layer when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        top: Defines final layers to evaluate based on a specific problem type. Options
            are 'classification' for ImageNet style problems, 'segmentation' for
            problems like the Pascal VOC dataset, and None to exclude these layers
            entirely.
    Returns:
        The keras `Model`.
    """

    input_shape, block_fn, residual_unit = init_model(input_shape, classes,
                                                      include_top, block,
                                                      residual_unit,
                                                      activation)
    img_input = layers.Input(shape=input_shape, tensor=input_tensor)

    # IoT Node
    iot = define_cnn_architecture_IoT(img_input, initial_filters,
                                      initial_kernel_size, initial_strides)
    # edge
    edge, filters = define_cnn_architecture_edge(
        iot,
        repetitions[0],
        transition_dilation_rate,
        block_fn,
        initial_filters,
        dropout,
        residual_unit,
        initial_pooling,
        initial_strides,
    )

    # fog node
    fog = layers.Lambda(lambda x: x * 1, name="node2_input")(edge)
    fog, filters = define_cnn_architecture_fog(
        fog,
        repetitions[1],
        transition_dilation_rate,
        block_fn,
        filters,
        dropout,
        residual_unit,
    )

    # cloud node
    cloud = layers.Lambda(lambda x: x * 1, name="node1_input")(fog)
    cloud = define_cnn_architecture_cloud(
        cloud,
        repetitions[2],
        repetitions[3],
        transition_dilation_rate,
        block_fn,
        filters,
        dropout,
        residual_unit,
        input_shape,
        classes,
        activation,
        include_top,
        top,
        final_pooling,
    )

    model, parallel_model = compile_keras_parallel_model(
        img_input, cloud, num_gpus)
    return model, parallel_model
コード例 #10
0
def build_paper_network(**model_params):
    """
    :return: the network the mentioned in the original paper:
    --------------------------------------------------------
    Koch, Gregory, Richard Zemel, and Ruslan Salakhutdinov.
    "Siamese neural networks for one-shot image recognition."
    In ICML deep learning workshop, vol. 2. 2015.
    """
    filter_size_conv1 = model_params.get('filter_size_conv1', 10)
    filter_size_conv2 = model_params.get('filter_size_conv2', 7)
    filter_size_conv3 = model_params.get('filter_size_conv3', 4)
    filter_size_conv4 = model_params.get('filter_size_conv4', 4)
    n_filters_conv1 = model_params.get('n_filters_conv1', 64)
    n_filters_conv2 = model_params.get('n_filters_conv2', 128)
    n_filters_conv3 = model_params.get('n_filters_conv3', 128)
    n_filters_conv4 = model_params.get('n_filters_conv4', 256)
    l2_conv1 = model_params.get('l2_conv1', 1e-2)
    l2_conv2 = model_params.get('l2_conv2', 1e-2)
    l2_conv3 = model_params.get('l2_conv3', 1e-2)
    l2_conv4 = model_params.get('l2_conv4', 1e-2)
    l2_dense = model_params.get('l2_dense', 1e-4)
    learning_rate = model_params.get('learning_rate', 1e-3)
    dense_size = model_params.get('dense_size', 4096)
    momentum = model_params.get('momentum', 0.5)
    decay = model_params.get('decay', 0.01)
    loss = model_params.get('loss', 'binary_crossentropy')

    input_shape = (IMAGES_DIM, IMAGES_DIM, 1)
    first_input = KL.Input(input_shape)
    second_input = KL.Input(input_shape)

    model = keras.Sequential()
    initialize_weights_conv = keras.initializers.RandomNormal(
        mean=0.0, stddev=0.01, seed=84)  # filters initialize
    initialize_weights_dense = keras.initializers.RandomNormal(
        mean=0.0, stddev=0.2, seed=84)  # dense initialize
    initialize_bias = keras.initializers.RandomNormal(
        mean=0.5, stddev=0.01, seed=84)  # bias initialize

    model.add(
        KL.Conv2D(n_filters_conv1, (filter_size_conv1, filter_size_conv1),
                  activation='relu',
                  kernel_regularizer=l2(l2_conv1),
                  kernel_initializer=initialize_weights_conv,
                  bias_initializer=initialize_bias,
                  input_shape=input_shape))
    model.add(KL.MaxPool2D())

    model.add(
        KL.Conv2D(n_filters_conv2, (filter_size_conv2, filter_size_conv2),
                  activation='relu',
                  kernel_regularizer=l2(l2_conv2),
                  kernel_initializer=initialize_weights_conv,
                  bias_initializer=initialize_bias))
    model.add(KL.MaxPool2D())

    model.add(
        KL.Conv2D(n_filters_conv3, (filter_size_conv3, filter_size_conv3),
                  activation='relu',
                  kernel_regularizer=l2(l2_conv3),
                  kernel_initializer=initialize_weights_conv,
                  bias_initializer=initialize_bias))
    model.add(KL.MaxPool2D())

    model.add(
        KL.Conv2D(n_filters_conv4, (filter_size_conv4, filter_size_conv4),
                  activation='relu',
                  kernel_regularizer=l2(l2_conv4),
                  kernel_initializer=initialize_weights_conv,
                  bias_initializer=initialize_bias))

    model.add(KL.Flatten())
    model.add(
        KL.Dense(dense_size,
                 activation='sigmoid',
                 kernel_regularizer=l2(l2_dense),
                 kernel_initializer=initialize_weights_dense,
                 bias_initializer=initialize_bias))

    hidden_first = model(first_input)
    hidden_second = model(second_input)

    L1_layer = KL.Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([hidden_first, hidden_second])
    similarity = KL.Dense(1,
                          activation='sigmoid',
                          bias_initializer=initialize_bias)(L1_distance)

    final_network = keras.Model(inputs=[first_input, second_input],
                                outputs=similarity)
    optimizer = keras.optimizers.SGD(lr=learning_rate,
                                     momentum=momentum,
                                     decay=decay)
    final_network.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])

    return final_network
コード例 #11
0
def build_vggface(**model_params):
    from keras_vggface.vggface import VGG16, RESNET50, SENET50

    dense_layer_size_1 = model_params.get('dense_size_1', 1024)
    dense_layer_size_2 = model_params.get('dense_size_2', 512)
    learning_rate = model_params.get('learning_rate', 1e-3)
    momentum = model_params.get('momentum', 0.5)
    decay = model_params.get('decay', 0.01)
    pre_trained_model = model_params.get('pre_trained_model', 'vgg16')
    dropout_prob = model_params.get('dropout_prob', 0.2)
    use_second_dense_layer = model_params.get('use_second_dense_layer', False)
    loss = model_params.get('loss', 'binary_crossentropy')

    initialize_bias = keras.initializers.RandomNormal(
        mean=0.5, stddev=0.01, seed=84)  # bias initialize

    initialize_weights = keras.initializers.glorot_uniform(seed=84)

    input_shape = (224, 224, 3)
    first_input = KL.Input(input_shape)
    second_input = KL.Input(input_shape)

    # remove the classifier layers and freeze the other layers
    if pre_trained_model == 'vgg16':
        vggface = VGG16()
        for i in range(6):
            vggface.layers.pop()
    elif pre_trained_model == 'resnet50':
        vggface = RESNET50()
        vggface.layers.pop()
    elif pre_trained_model == 'senet50':
        vggface = SENET50()
        vggface.layers.pop()
    else:
        raise Exception('Pretrained {} not familiar'.format(
            model_params['pre_trained_model']))

    for layer in vggface.layers:
        layer.trainable = False

    new_model = keras.Sequential()
    new_model.add(vggface)
    new_model.add(
        KL.Dense(dense_layer_size_1,
                 activation='relu',
                 kernel_initializer=initialize_weights,
                 bias_initializer=initialize_bias,
                 kernel_regularizer=l2(1e-2)))
    new_model.add(KL.BatchNormalization())
    new_model.add(KL.Dropout(dropout_prob))
    if use_second_dense_layer:
        new_model.add(
            KL.Dense(dense_layer_size_2,
                     activation='relu',
                     kernel_initializer=initialize_weights,
                     bias_initializer=initialize_bias,
                     kernel_regularizer=l2(1e-2)))
        new_model.add(KL.Dropout(dropout_prob))

    first_hidden = new_model(first_input)
    second_hidden = new_model(second_input)

    L1_layer = KL.Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([first_hidden, second_hidden])
    similarity = KL.Dense(1,
                          activation='sigmoid',
                          kernel_initializer=initialize_weights,
                          bias_initializer=initialize_bias)(L1_distance)

    final_network = keras.Model(inputs=[first_input, second_input],
                                outputs=similarity)
    optimizer = keras.optimizers.Adam(lr=learning_rate)
    final_network.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])

    return final_network
コード例 #12
0
def build_hani_best_model(**model_params):
    """
    :return: the network the mentioned in the Hani et el. paper: 
    --------------------------------------------------------
    Khalil-Hani, M., & Sung, L. S. (2014). A convolutional neural
    network approach for face verification. High Performance Computing
    & Simulation (HPCS), 2014 International Conference on, (3), 707–714.
    doi:10.1109/HPCSim.2014.6903759
    but with optimized hyperparmeters after the hyperas exection
    """

    act = model_params.get('act', 'relu')
    dropout = model_params.get('dropout', 0)
    batchnorm = model_params.get('batchnorm', False)
    loss = model_params.get('loss', contrastive_loss)
    learning_rate = model_params.get('learning_rate', 1e-3)
    input_shape = (IMAGES_DIM, IMAGES_DIM, 1)
    first_input = KL.Input(input_shape)
    second_input = KL.Input(input_shape)

    model = keras.Sequential()
    initialize_weights_conv = keras.initializers.glorot_uniform(
        seed=84)  # filters initialize
    initialize_weights_dense = keras.initializers.glorot_uniform(
        seed=84)  # dense initialize
    initialize_bias = keras.initializers.RandomNormal(
        mean=0.5, stddev=0.01, seed=84)  # bias initialize

    model.add(
        KL.Conv2D(5, (6, 6),
                  strides=(2, 2),
                  activation=act,
                  input_shape=input_shape,
                  kernel_initializer=initialize_weights_conv,
                  kernel_regularizer=l2(0.03148394777069553)))

    model.add(KL.BatchNormalization())
    model.add(KL.Dropout(0.3065491917788273))
    model.add(KL.MaxPool2D())

    model.add(
        KL.Conv2D(14, (6, 6),
                  strides=(2, 2),
                  activation=act,
                  kernel_initializer=initialize_weights_conv,
                  bias_initializer=initialize_bias,
                  kernel_regularizer=l2(0.054048669207277224)))
    #model.add(KL.BatchNormalization())
    model.add(KL.Dropout(0.4797699256757003))
    model.add(KL.MaxPool2D())

    model.add(
        KL.Conv2D(60, (6, 6),
                  activation=act,
                  kernel_initializer=initialize_weights_conv,
                  bias_initializer=initialize_bias,
                  kernel_regularizer=l2(0.06189584230948173)))

    model.add(KL.BatchNormalization())
    model.add(KL.Dropout(0.020012398358003752))
    model.add(KL.MaxPool2D())

    model.add(KL.Flatten())

    model.add(
        KL.Dense(40,
                 activation=act,
                 kernel_regularizer=l2(0.082430594544267),
                 kernel_initializer=initialize_weights_dense,
                 bias_initializer=initialize_bias))
    model.add(KL.Dropout(0.012533877486030926))
    model.add(
        KL.Dense(40,
                 activation=None,
                 kernel_regularizer=l2(0.046085917780636185),
                 kernel_initializer=initialize_weights_dense,
                 bias_initializer=initialize_bias))
    model.add(KL.Dropout(0.05086327591390307))

    # Generate the encodings (feature vectors) for the two images
    encoded_l = model(first_input)
    encoded_r = model(second_input)

    # calculate similarity
    L1_distance = KL.Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))(
        [encoded_l, encoded_r])
    similarity = KL.Dense(1,
                          activation='sigmoid',
                          kernel_initializer=initialize_weights_dense,
                          bias_initializer=initialize_bias)(L1_distance)
    final_network = keras.Model(inputs=[first_input, second_input],
                                outputs=similarity)
    optimizer = keras.optimizers.SGD(lr=0.03863427079945416,
                                     momentum=0.8962431889503087,
                                     decay=0.019965108317109886)
    final_network.compile(loss='binary_crossentropy',
                          optimizer=optimizer,
                          metrics=['accuracy'])
    return final_network
コード例 #13
0
    def make_parallel(self):
        """Creates a new wrapper model that consists of multiple replicas of
        the original model placed on different GPUs.
        """
        # Slice inputs. Slice inputs on the CPU to avoid sending a copy
        # of the full inputs to all GPUs. Saves on bandwidth and memory.
        print('input_name:', self.inner_model.input_names)
        print('inputs:', self.inner_model.inputs)
        input_slices = {
            name: tf.split(x, self.gpu_count)
            for name, x in zip(self.inner_model.input_names,
                               self.inner_model.inputs)
        }

        print('input_slices:', input_slices)

        output_names = self.inner_model.output_names
        outputs_all = []
        for i in range(len(self.inner_model.outputs)):
            outputs_all.append([])

            print('outputs_all:', outputs_all)

        # Run the model call() on each GPU to place the ops there
        for i in range(self.gpu_count):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('tower_%d' % i):
                    # Run a slice of inputs through this replica
                    zipped_inputs = zip(self.inner_model.input_names,
                                        self.inner_model.inputs)
                    inputs = [
                        KL.Lambda(lambda s: input_slices[name][i],
                                  output_shape=lambda s:
                                  (None, ) + s[1:])(tensor)
                        for name, tensor in zipped_inputs
                    ]
                    # Create the model replica and get the outputs
                    outputs = self.inner_model(inputs)
                    if not isinstance(outputs, list):
                        outputs = [outputs]
                    # Save the outputs for merging back together later
                    for l, o in enumerate(outputs):
                        outputs_all[l].append(o)

        # Merge outputs on CPU
        with tf.device('/cpu:0'):
            merged = []
            for outputs, name in zip(outputs_all, output_names):
                # Concatenate or average outputs?
                # Outputs usually have a batch dimension and we concatenate
                # across it. If they don't, then the output is likely a loss
                # or a metric value that gets averaged across the batch.
                # Keras expects losses and metrics to be scalars.
                if K.int_shape(outputs[0]) == ():
                    # Average
                    m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs),
                                  name=name)(outputs)
                else:
                    # Concatenate
                    m = KL.Concatenate(axis=0, name=name)(outputs)
                merged.append(m)
        return merged
コード例 #14
0
        def build_network(self):
            s = keras_layers.Input(shape=self.nn.input_dims,
                                   dtype='float32',
                                   name='s')
            G = keras_layers.Input(shape=(1, ), dtype='float32', name='G')

            if self.nn.input_type == INPUT_TYPE_OBSERVATION_VECTOR:
                x = keras_layers.Dense(
                    self.nn.fc_layers_dims[0],
                    activation='relu',
                    kernel_initializer=keras_init.he_normal())(s)

            else:  # self.input_type == INPUT_TYPE_STACKED_FRAMES
                x = keras_layers.Conv2D(
                    filters=32,
                    kernel_size=(8, 8),
                    strides=4,
                    name='conv1',
                    kernel_initializer=keras_init.he_normal())(s)
                x = keras_layers.BatchNormalization(epsilon=1e-5,
                                                    name='conv1_bn')(x)
                x = keras_layers.Activation('relu', name='conv1_bn_ac')(x)

                x = keras_layers.Conv2D(
                    filters=64,
                    kernel_size=(4, 4),
                    strides=2,
                    name='conv2',
                    kernel_initializer=keras_init.he_normal())(x)
                x = keras_layers.BatchNormalization(epsilon=1e-5,
                                                    name='conv2_bn')(x)
                x = keras_layers.Activation('relu', name='conv2_bn_ac')(x)

                x = keras_layers.Conv2D(
                    filters=128,
                    kernel_size=(3, 3),
                    strides=1,
                    name='conv3',
                    kernel_initializer=keras_init.he_normal())(x)
                x = keras_layers.BatchNormalization(epsilon=1e-5,
                                                    name='conv3_bn')(x)
                x = keras_layers.Activation('relu', name='conv3_bn_ac')(x)

                x = keras_layers.Flatten()(x)

            x = keras_layers.Dense(
                self.nn.fc_layers_dims[-1],
                activation='relu',
                kernel_initializer=keras_init.he_normal())(x)

            if self.nn.is_discrete_action_space:
                pi = keras_layers.Dense(
                    self.nn.n_actions,
                    activation='softmax',
                    name='pi',  # a_probs = the stochastic policy (π)
                    kernel_initializer=keras_init.glorot_normal())(x)
                self.policy = keras_models.Model(inputs=s, outputs=pi)
                self.model = keras_models.Model(inputs=[s, G],
                                                outputs=pi)  # policy_model
            else:
                mu = keras_layers.Dense(
                    self.nn.n_actions,
                    name='mu',  # Mean (μ)
                    kernel_initializer=keras_init.glorot_normal())(x)
                sigma_unactivated = keras_layers.Dense(
                    self.nn.n_actions,
                    name=
                    'sigma_unactivated',  # unactivated STD (σ) - can be a negative number
                    kernel_initializer=keras_init.glorot_normal())(x)
                # Element-wise exponential: e^(sigma_unactivated):
                #   we activate sigma since STD (σ) is strictly real-valued (positive, non-zero - it's not a Dirac delta function).
                sigma = keras_layers.Lambda(
                    lambda sig: keras_backend.exp(sig),  # STD (σ)
                    name='sigma')(sigma_unactivated)

                self.policy = keras_models.Model(inputs=s, outputs=[mu, sigma])
                self.model = keras_models.Model(inputs=[s, G],
                                                outputs=[mu, sigma
                                                         ])  # policy_model

            is_discrete_action_space = self.nn.is_discrete_action_space

            def custom_loss(
                y_true, y_pred
            ):  # (a_indices_one_hot, actor.output - pi \ [mu, sigma])
                if is_discrete_action_space:
                    prob_chosen_a = keras_backend.sum(
                        y_pred * y_true)  # outputs the prob of the chosen a
                    prob_chosen_a = keras_backend.clip(
                        prob_chosen_a, 1e-8, 1 -
                        1e-8)  # boundaries to prevent from taking log of 0\1
                    log_prob_chosen_a = keras_backend.log(
                        prob_chosen_a
                    )  # log_probability, negative value (since prob<1)
                    loss = -log_prob_chosen_a * G
                else:
                    mu_pred, sigma_pred = y_pred[0], y_pred[
                        1]  # Mean (μ), STD (σ)
                    gaussian_dist = tfp.distributions.Normal(loc=mu_pred,
                                                             scale=sigma_pred)
                    a_log_prob = gaussian_dist.log_prob(y_true[0])
                    loss = -keras_backend.mean(a_log_prob) * G

                return loss

            optimizer = keras_get_optimizer(self.nn.optimizer_type,
                                            self.nn.ALPHA)
            self.model.compile(optimizer, loss=custom_loss)
コード例 #15
0
def fpn_classifier_graph(rois,
                         feature_maps,
                         image_meta,
                         pool_size,
                         num_classes,
                         train_bn=True,
                         fc_layers_size=1024):
    """Builds the computation graph of the feature pyramid network classifier
    and regressor heads.
    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
          coordinates.
    feature_maps: List of feature maps from diffent layers of the pyramid,
                  [P2, P3, P4, P5]. Each has a different resolution.
    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    pool_size: The width of the square feature map generated from ROI Pooling.
    num_classes: number of classes, which determines the depth of the results
    train_bn: Boolean. Train or freeze Batch Norm layres
    Returns:
        logits: [N, NUM_CLASSES] classifier logits (before softmax)
        probs: [N, NUM_CLASSES] classifier probabilities
        bbox_deltas: [N, (dy, dx, log(dh), log(dw))] Deltas to apply to
                     proposal boxes
    """
    # ROI Pooling
    # Shape: [batch, num_boxes, pool_height, pool_width, channels]
    x = modellib.PyramidROIAlign(
        [pool_size, pool_size],
        name="roi_align_classifier")([rois, image_meta] + feature_maps)
    # Two 1024 FC layers (implemented with Conv2D for consistency)
    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size),
                                     padding="valid"),
                           name="mrcnn_class_conv1")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(),
                           name='mrcnn_class_bn1')(x, training=train_bn)
    x = KL.Activation('relu')(x)
    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
                           name="mrcnn_class_conv2")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(),
                           name='mrcnn_class_bn2')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
                       name="pool_squeeze")(x)

    # Classifier head
    mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes),
                                            name='mrcnn_class_logits')(shared)
    mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"),
                                     name="mrcnn_class")(mrcnn_class_logits)

    # BBox head
    # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))]
    x = KL.TimeDistributed(KL.Dense(4, activation='linear'),
                           name='mrcnn_bbox_fc')(shared)
    # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))]
    s = K.int_shape(x)
    x = KL.Reshape((s[1], 1, 4), name="mrcnn_bbox")(x)
    # Duplicate output for fg/bg detections
    mrcnn_bbox = KL.Concatenate(axis=-2)([x for i in range(num_classes)])

    return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
コード例 #16
0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon


# x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3])
# x = layers.Input(batch_shape=(batch_size, 32, 32, 3))
x = layers.Input(shape=(32, 32, 3))

encoded = encoder(x)

mean = layers.Dense(1024, activation=tf.nn.softplus)(encoded)
sigma = layers.Dense(1024, activation=tf.nn.relu)(encoded)

# z = mean + tf.multiply(tf.sqrt(tf.exp(sigma)),
#                        tf.random_normal(shape=(batch_size, 1024)))
z = layers.Lambda(sampling)([mean, sigma])
my_encoder = keras.models.Model(x, [mean, sigma, z])

latent_inputs = layers.Input(shape=(1024, ))

x_reco = decoder(latent_inputs)
my_decoder = keras.models.Model(latent_inputs, x_reco)

x_reco = my_decoder(my_encoder(x)[2])
my_vae = keras.models.Model(x, x_reco)

reconstruction_term = -tf.reduce_sum(
    tfp.distributions.MultivariateNormalDiag(
        layers.Reshape(
            (3072, ))(x_reco), scale_identity_multiplier=0.05).log_prob(
                layers.Reshape((3072, ))(x)))
コード例 #17
0
    def build(self, mode, config):
        """Build Mask R-CNN architecture.
            input_shape: The shape of the input image.
            mode: Either "training" or "inference". The inputs and
                outputs of the model differ accordingly.
        """
        assert mode in ['training', 'inference']

        # Image size must be dividable by 2 multiple times
        h, w = config.IMAGE_SHAPE[:2]
        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
            raise Exception(
                "Image size must be dividable by 2 at least 6 times "
                "to avoid fractions when downscaling and upscaling."
                "For example, use 256, 320, 384, 448, 512, ... etc. ")

        # Inputs
        input_image = KL.Input(shape=config.IMAGE_SHAPE.tolist(),
                               name="input_image")
        # CHANGE: add target input
        if not config.NUM_TARGETS:
            config.NUM_TARGETS = 1
        input_target = KL.Input(shape=[config.NUM_TARGETS] +
                                config.TARGET_SHAPE.tolist(),
                                name="input_target")
        input_image_meta = KL.Input(shape=[config.IMAGE_META_SIZE],
                                    name="input_image_meta")
        if mode == "training":
            # RPN GT
            input_rpn_match = KL.Input(shape=[None, 1],
                                       name="input_rpn_match",
                                       dtype=tf.int32)
            input_rpn_bbox = KL.Input(shape=[None, 4],
                                      name="input_rpn_bbox",
                                      dtype=tf.float32)

            # Detection GT (class IDs, bounding boxes, and masks)
            # 1. GT Class IDs (zero padded)
            input_gt_class_ids = KL.Input(shape=[None],
                                          name="input_gt_class_ids",
                                          dtype=tf.int32)
            # 2. GT Boxes in pixels (zero padded)
            # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates
            input_gt_boxes = KL.Input(shape=[None, 4],
                                      name="input_gt_boxes",
                                      dtype=tf.float32)
            # Normalize coordinates
            gt_boxes = KL.Lambda(lambda x: modellib.norm_boxes_graph(
                x,
                K.shape(input_image)[1:3]))(input_gt_boxes)
            # 3. GT Masks (zero padded)
            # [batch, height, width, MAX_GT_INSTANCES]
            if config.USE_MINI_MASK:
                input_gt_masks = KL.Input(shape=[
                    config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], None
                ],
                                          name="input_gt_masks",
                                          dtype=bool)
            else:
                input_gt_masks = KL.Input(
                    shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None],
                    name="input_gt_masks",
                    dtype=bool)
        elif mode == "inference":
            # Anchors in normalized coordinates
            input_anchors = KL.Input(shape=[None, 4], name="input_anchors")

        # Build the shared convolutional layers.
        # CHANGE: Use weightshared FPN model for image and target
        # Create FPN Model
        resnet = build_resnet_model(self.config)
        fpn = build_fpn_model(feature_maps=self.config.FPN_FEATUREMAPS)
        # Create Image FP
        _, IC2, IC3, IC4, IC5 = resnet(input_image)
        IP2, IP3, IP4, IP5, IP6 = fpn([IC2, IC3, IC4, IC5])
        # Create Target FR
        input_targets = [
            KL.Lambda(lambda x: x[:, idx, ...])(input_target)
            for idx in range(input_target.shape[1])
        ]
        for k, one_target in enumerate(input_targets):
            _, TC2, TC3, TC4, TC5 = resnet(one_target)
            out = fpn([TC2, TC3, TC4, TC5])
            if k == 0:
                target_pyramid = out
            else:
                target_pyramid = [
                    KL.Add(name="target_adding_{}_{}".format(k, i))(
                        [target_pyramid[i], out[i]]) for i in range(len(out))
                ]

        TP2, TP3, TP4, TP5, TP6 = [
            KL.Lambda(lambda x: x / config.NUM_TARGETS)(target_pyramid[i])
            for i in range(len(target_pyramid))
        ]
        #        one_target = KL.Lambda(lambda x: x[:,0,...])(input_target)
        #        one_target = input_target[:,0,...]
        #         _, TC2, TC3, TC4, TC5 = resnet(one_target)
        #         TP2, TP3, TP4, TP5, TP6 = fpn([TC2, TC3, TC4, TC5])

        # CHANGE: add siamese distance copmputation
        # Combine FPs using L1 distance
        P2 = l1_distance_graph(IP2,
                               TP2,
                               feature_maps=3 * self.config.FPN_FEATUREMAPS //
                               2,
                               name='P2')
        P3 = l1_distance_graph(IP3,
                               TP3,
                               feature_maps=3 * self.config.FPN_FEATUREMAPS //
                               2,
                               name='P3')
        P4 = l1_distance_graph(IP4,
                               TP4,
                               feature_maps=3 * self.config.FPN_FEATUREMAPS //
                               2,
                               name='P4')
        P5 = l1_distance_graph(IP5,
                               TP5,
                               feature_maps=3 * self.config.FPN_FEATUREMAPS //
                               2,
                               name='P5')
        P6 = l1_distance_graph(IP6,
                               TP6,
                               feature_maps=3 * self.config.FPN_FEATUREMAPS //
                               2,
                               name='P6')

        # Note that P6 is used in RPN, but not in the classifier heads.
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        mrcnn_feature_maps = [P2, P3, P4, P5]

        # Anchors
        if mode == "training":
            anchors = self.get_anchors(config.IMAGE_SHAPE)
            # Duplicate across the batch dimension because Keras requires it
            # TODO: can this be optimized to avoid duplicating the anchors?
            anchors = np.broadcast_to(anchors,
                                      (config.BATCH_SIZE, ) + anchors.shape)
            # A hack to get around Keras's bad support for constants
            anchors = KL.Lambda(lambda x: tf.Variable(anchors),
                                name="anchors")(input_image)
        else:
            anchors = input_anchors

        # RPN Model
        # CHANGE: Set number of filters to [3*self.config.FPN_FEATUREMAPS//2]
        rpn = modellib.build_rpn_model(config.RPN_ANCHOR_STRIDE,
                                       len(config.RPN_ANCHOR_RATIOS),
                                       3 * self.config.FPN_FEATUREMAPS // 2)
        # Loop through pyramid layers
        layer_outputs = []  # list of lists
        for p in rpn_feature_maps:
            layer_outputs.append(rpn([p]))
        # Concatenate layer outputs
        # Convert from list of lists of level outputs to list of lists
        # of outputs across levels.
        # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
        output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"]
        outputs = list(zip(*layer_outputs))
        outputs = [
            KL.Concatenate(axis=1, name=n)(list(o))
            for o, n in zip(outputs, output_names)
        ]

        rpn_class_logits, rpn_class, rpn_bbox = outputs

        # Generate proposals
        # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
        # and zero padded.
        proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\
            else config.POST_NMS_ROIS_INFERENCE
        rpn_rois = modellib.ProposalLayer(
            proposal_count=proposal_count,
            nms_threshold=config.RPN_NMS_THRESHOLD,
            name="ROI",
            config=config)([rpn_class, rpn_bbox, anchors])

        if mode == "training":
            # Class ID mask to mark class IDs supported by the dataset the image
            # came from.
            active_class_ids = KL.Lambda(
                lambda x: modellib.parse_image_meta_graph(x)[
                    "active_class_ids"])(input_image_meta)

            if not config.USE_RPN_ROIS:
                # Ignore predicted ROIs and use ROIs provided as an input.
                input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4],
                                      name="input_roi",
                                      dtype=np.int32)
                # Normalize coordinates
                target_rois = KL.Lambda(lambda x: modellib.norm_boxes_graph(
                    x,
                    K.shape(input_image)[1:3]))(input_rois)
            else:
                target_rois = rpn_rois

            # Generate detection targets
            # Subsamples proposals and generates target outputs for training
            # Note that proposal class IDs, gt_boxes, and gt_masks are zero
            # padded. Equally, returned rois and targets are zero padded.
            rois, target_class_ids, target_bbox, target_mask =\
                modellib.DetectionTargetLayer(config, name="proposal_targets")([
                    target_rois, input_gt_class_ids, gt_boxes, input_gt_masks])

            # Network Heads
            # TODO: verify that this handles zero padded ROIs
            # CHANGE: reduce number of classes to 2
            # CHANGE: replaced with custom 2 class function
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
                fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta,
                                     config.POOL_SIZE, num_classes=2,
                                     train_bn=config.TRAIN_BN,
                                     fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
            # CHANGE: reduce number of classes to 2
            # CHANGE: replaced with custom 2 class function
            if config.MODEL == 'mrcnn':
                mrcnn_mask = fpn_mask_graph(rois,
                                            mrcnn_feature_maps,
                                            input_image_meta,
                                            config.MASK_POOL_SIZE,
                                            num_classes=2,
                                            train_bn=config.TRAIN_BN)

            # TODO: clean up (use tf.identify if necessary)
            output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois)

            # Losses
            rpn_class_loss = KL.Lambda(
                lambda x: modellib.rpn_class_loss_graph(*x),
                name="rpn_class_loss")([input_rpn_match, rpn_class_logits])
            rpn_bbox_loss = KL.Lambda(
                lambda x: modellib.rpn_bbox_loss_graph(config, *x),
                name="rpn_bbox_loss")(
                    [input_rpn_bbox, input_rpn_match, rpn_bbox])
            # CHANGE: use custom class loss without using active_class_ids
            class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x),
                                   name="mrcnn_class_loss")([
                                       target_class_ids, mrcnn_class_logits,
                                       active_class_ids
                                   ])
            bbox_loss = KL.Lambda(lambda x: modellib.mrcnn_bbox_loss_graph(*x),
                                  name="mrcnn_bbox_loss")([
                                      target_bbox, target_class_ids, mrcnn_bbox
                                  ])
            if config.MODEL == 'mrcnn':
                mask_loss = KL.Lambda(
                    lambda x: modellib.mrcnn_mask_loss_graph(*x),
                    name="mrcnn_mask_loss")(
                        [target_mask, target_class_ids, mrcnn_mask])

            # Model
            # CHANGE: Added target to inputs
            inputs = [
                input_image, input_image_meta, input_target, input_rpn_match,
                input_rpn_bbox, input_gt_class_ids, input_gt_boxes,
                input_gt_masks
            ]
            if not config.USE_RPN_ROIS:
                inputs.append(input_rois)
            if config.MODEL == 'mrcnn':
                outputs = [
                    rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits,
                    mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois,
                    rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss,
                    mask_loss
                ]
            elif config.MODEL == 'frcnn':
                outputs = [
                    rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits,
                    mrcnn_class, mrcnn_bbox, rpn_rois, output_rois,
                    rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss
                ]
            model = KM.Model(inputs, outputs, name='mask_rcnn')
        else:
            # Network Heads
            # Proposal classifier and BBox regressor heads
            # CHANGE: reduce number of classes to 2
            # CHANGE: replaced with custom 2 class function
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
                fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta,
                                     config.POOL_SIZE, num_classes=2,
                                     train_bn=config.TRAIN_BN,
                                     fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)

            # Detections
            # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
            # normalized coordinates
            detections = modellib.DetectionLayer(config,
                                                 name="mrcnn_detection")([
                                                     rpn_rois, mrcnn_class,
                                                     mrcnn_bbox,
                                                     input_image_meta
                                                 ])

            # Create masks for detections
            detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections)
            # CHANGE: reduce number of classes to 2
            # CHANGE: replaced with custom 2 class function
            if config.MODEL == 'mrcnn':
                mrcnn_mask = fpn_mask_graph(detection_boxes,
                                            mrcnn_feature_maps,
                                            input_image_meta,
                                            config.MASK_POOL_SIZE,
                                            num_classes=2,
                                            train_bn=config.TRAIN_BN)

            # CHANGE: Added target to the input
            inputs = [
                input_image, input_image_meta, input_target, input_anchors
            ]
            if config.MODEL == 'mrcnn':
                outputs = [
                    detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois,
                    rpn_class, rpn_bbox
                ]
            elif config.MODEL == 'frcnn':
                outputs = [
                    detections, mrcnn_class, mrcnn_bbox, rpn_rois, rpn_class,
                    rpn_bbox
                ]
            model = KM.Model(inputs, outputs, name='mask_rcnn')

        # Add multi-GPU support.
        if config.GPU_COUNT > 1:
            from mrcnn.parallel_model import ParallelModel
            model = ParallelModel(model, config.GPU_COUNT)

        return model
コード例 #18
0
def multi_gpu_model(model, gpus=None):
    """Replicates a model on different GPUs.
    Specifically, this function implements single-machine
    multi-GPU data parallelism. It works in the following way:
    - Divide the model's input(s) into multiple sub-batches.
    - Apply a model copy on each sub-batch. Every model copy
        is executed on a dedicated GPU.
    - Concatenate the results (on CPU) into one big batch.
    E.g. if your `batch_size` is 64 and you use `gpus=2`,
    then we will divide the input into 2 sub-batches of 32 samples,
    process each sub-batch on one GPU, then return the full
    batch of 64 processed samples.
    This induces quasi-linear speedup on up to 8 GPUs.
    This function is only available with the TensorFlow backend
    for the time being.
    # Arguments
        model: A Keras model instance. To avoid OOM errors,
            this model could have been built on CPU, for instance
            (see usage example below).
        gpus: Integer >= 2 or list of integers, number of GPUs or
            list of GPU IDs on which to create model replicas.
    # Returns
        A Keras `Model` instance which can be used just like the initial
        `model` argument, but which distributes its workload on multiple GPUs.
    # Example
    ```python
        import tensorflow as tf
        from keras.applications import Xception
        from keras.utils import multi_gpu_model
        import numpy as np
        num_samples = 1000
        height = 224
        width = 224
        num_classes = 1000
        # Instantiate the base model (or "template" model).
        # We recommend doing this with under a CPU device scope,
        # so that the model's weights are hosted on CPU memory.
        # Otherwise they may end up hosted on a GPU, which would
        # complicate weight sharing.
        with tf.device('/cpu:0'):
            model = Xception(weights=None,
                             input_shape=(height, width, 3),
                             classes=num_classes)
        # Replicates the model on 8 GPUs.
        # This assumes that your machine has 8 available GPUs.
        parallel_model = multi_gpu_model(model, gpus=8)
        parallel_model.compile(loss='categorical_crossentropy',
                               optimizer='rmsprop')
        # Generate dummy data.
        x = np.random.random((num_samples, height, width, 3))
        y = np.random.random((num_samples, num_classes))
        # This `fit` call will be distributed on 8 GPUs.
        # Since the batch size is 256, each GPU will process 32 samples.
        parallel_model.fit(x, y, epochs=20, batch_size=256)
        # Save model via the template model (which shares the same weights):
        model.save('my_model.h5')
    ```
    # On model saving
    To save the multi-gpu model, use `.save(fname)` or `.save_weights(fname)`
    with the template model (the argument you passed to `multi_gpu_model`),
    rather than the model returned by `multi_gpu_model`.
    """
    """
    if K.backend() != 'tensorflow':
        raise ValueError('`multi_gpu_model` is only available '
                         'with the TensorFlow backend.')

    available_devices = _get_available_devices()
    available_devices = [_normalize_device_name(name) for name in available_devices]
    if not gpus:
        # Using all visible GPUs when not specifying `gpus`
        # e.g. CUDA_VISIBLE_DEVICES=0,2 python3 keras_mgpu.py
        gpus = len([x for x in available_devices if 'gpu' in x])
    """
    
    if isinstance(gpus, (list, tuple)):
        if len(gpus) <= 1:
            raise ValueError('For multi-gpu usage to be effective, '
                             'call `multi_gpu_model` with `len(gpus) >= 2`. '
                             'Received: `gpus=%s`' % gpus)
        num_gpus = len(gpus)
        target_gpu_ids = gpus
    else:
        if gpus <= 1:
            raise ValueError('For multi-gpu usage to be effective, '
                             'call `multi_gpu_model` with `gpus >= 2`. '
                             'Received: `gpus=%d`' % gpus)
        num_gpus = gpus
        target_gpu_ids = range(num_gpus)

    import tensorflow as tf

    target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids]
    
    def get_slice(data, i, parts):
        shape       = tf.shape(data)
        batch_size  = shape[:1]
        input_shape = shape[1:]
        step = batch_size // parts
        
        if i == num_gpus - 1:
            size = batch_size - step * i
        else:
            size = step
        
        size = tf.concat([size, input_shape], axis=0)
        stride = tf.concat([step, input_shape * 0], axis=0)
        start = stride * i
        
        return tf.slice(data, start, size)

    all_outputs = []
    for i in range(len(model.outputs)):
        all_outputs.append([])

    # Place a copy of the model on each GPU,
    # each getting a slice of the inputs.
    for i, gpu_id in enumerate(target_gpu_ids):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('replica_%d' % gpu_id):
                inputs = []
                # Retrieve a slice of the input.
                for x in model.inputs:
                    input_shape = tuple(x.get_shape().as_list())[1:]
                    slice_i = KL.Lambda(get_slice,
                                     output_shape=input_shape,
                                     arguments={'i': i,
                                                'parts': num_gpus})(x)
                    inputs.append(slice_i)

                # Apply model on slice
                # (creating a model replica on the target device).
                outputs = model(inputs)
                if not isinstance(outputs, list):
                    outputs = [outputs]

                # Save the outputs for merging back together later.
                for o in range(len(outputs)):
                    all_outputs[o].append(outputs[o])

    # Merge outputs on CPU.
    with tf.device('/cpu:0'):
        merged = []
        for name, outputs in zip(model.output_names, all_outputs):
            # If outputs are numbers without dimensions, add a batch dim.
            def add_dim(tensor):
                """Add a dimension to tensors that don't have any."""
                if K.int_shape(tensor) == ():
                    return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
                return tensor
            outputs = list(map(add_dim, outputs))
            
            verbose = 0
            if verbose:
                print ('---------------->')
                for each in outputs:
                    print (each)
                    
            merged.append(KL.concatenate(outputs,
                                      axis=0, name=name))
        return KM.Model(model.inputs, merged)
コード例 #19
0
    def build(self, mode, subnet, config):

        assert mode in ["training", "inference"]
        input_image = KL.Input(shape=[64, 64, 3], dtype=tf.float32)
        input_bboxes = KL.Input(shape=[None, 4], dtype=tf.float32)
        input_class_ids = KL.Input(shape=[None], dtype=tf.int32)
        input_active_ids = KL.Input(shape=[4, ], dtype=tf.int32)
        input_rpn_match = KL.Input(shape=[None, 1], dtype=tf.int32)
        input_rpn_bbox = KL.Input(shape=[None, 4], dtype=tf.float32)

        h, w = config.image_size[: 2]
        image_scale = K.cast(K.stack([h, w, h, w], axis=0), tf.float32)
        gt_bboxes = KL.Lambda(lambda x: x / image_scale)(input_bboxes)

        feature_map = resNet_featureExtractor(input_image)
        rpn_class, rpn_prob, rpn_bbox = rpn_net(feature_map, 9)

        anchors = utils.anchor_gen(featureMap_size=[8, 8], ratios=config.ratios, scales=config.scales, \
                                   rpn_stride=config.rpn_stride, anchor_stride=config.anchor_stride)

        proposals = proposal_func.proposal(proposal_count=16, nms_thresh=0.7, anchors=anchors, \
                                           batch_size=20, config=config)([rpn_prob, rpn_bbox])
        if mode == "training":
            target_rois, target_class_ids, target_delta, target_bboxes = detection_target_fixed.DetectionTarget(
                config=config, \
                name="proposal_target")([proposals, input_class_ids, gt_bboxes])
            denomrlaize_rois = KL.Lambda(lambda x: 8.0 * x, name="denormalized_rois")(target_rois)
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpn_classifiler(feature_map, denomrlaize_rois, 20, 21, 7, 4)

            loss_rpn_match = KL.Lambda(lambda x: rpn_class_loss(*x), name="loss_rpn_match")(
                [input_rpn_match, rpn_class])

            loss_rpn_bbox = KL.Lambda(lambda x: rpn_bbox_loss(*x), name="loss_rpn_bbox")(
                [input_rpn_bbox, input_rpn_match, rpn_bbox])

            bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="bbox_loss")(
                [target_delta, target_class_ids, mrcnn_bbox])
            class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graphV2(*x), name="mrcnn_class_loss")(
                [target_class_ids, mrcnn_class_logits, input_active_ids])

            if subnet == "rpn":

                model = Model(
                    [input_image, input_bboxes, input_class_ids, input_active_ids, input_rpn_match, input_rpn_bbox],
                    [feature_map, rpn_class, rpn_prob, rpn_bbox, proposals, target_rois, denomrlaize_rois,
                     target_class_ids, target_delta, target_bboxes, \
                     loss_rpn_match, loss_rpn_bbox])
            elif subnet == "all":
                model = Model(
                    [input_image, input_bboxes, input_class_ids, input_active_ids, input_rpn_match, input_rpn_bbox],
                    [feature_map, rpn_class, rpn_prob, rpn_bbox, proposals, target_rois, denomrlaize_rois,
                     target_class_ids, target_delta, target_bboxes, \
                     mrcnn_class_logits, mrcnn_class, mrcnn_bbox, loss_rpn_match, loss_rpn_bbox, bbox_loss, class_loss])

        if mode == "inference":
            denomrlaize_proposals = KL.Lambda(lambda x: 8.0 * x, name="denormalized_proposals")(proposals)
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpn_classifiler(feature_map, denomrlaize_proposals, 20, 16, 7,
                                                                          4)
            detections = DetectionLayer()([proposals, mrcnn_class, mrcnn_bbox])

            model = Model([input_image], [detections])

        return model
コード例 #20
0
    def make_parallel(self):
        """Creates a new wrapper model that consists of multiple replicas of
        the original model placed on different GPUs.
        """
        # Slice inputs. Slice inputs on the CPU to avoid sending a copy
        # of the full inputs to all GPUs. Saves on bandwidth and memory.
        if self.verbose:
            for each in zip(self.inner_model.input_names, self.inner_model.inputs):
                print ('---> ', each)
        input_slices = {name: tf.split(x, self.gpu_count)
                        for name, x in zip(self.inner_model.input_names,
                                           self.inner_model.inputs)}

        output_names = self.inner_model.output_names
        outputs_all = []
        for i in range(len(self.inner_model.outputs)):
            outputs_all.append([])

        # Run the model call() on each GPU to place the ops there
        for i in range(self.gpu_count):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('tower_%d' % i):
                    # Run a slice of inputs through this replica
                    zipped_inputs = zip(self.inner_model.input_names,
                                        self.inner_model.inputs)
                    inputs = [
                        KL.Lambda(lambda s: input_slices[name][i],
                                  output_shape=lambda s: (None,) + s[1:])(tensor)
                        for name, tensor in zipped_inputs]
                    # Create the model replica and get the outputs
                    if self.verbose:
                        if i == 0:
                            print ('\ntower_{0} - i/p '.format(i))
                            for each in inputs:
                                print ('--->', each)
                    
                    outputs = self.inner_model(inputs)
                    if self.verbose:
                        if i == 0:
                            print ('\ntower_{0} - o/p '.format(i))
                            for each in outputs:
                                print ('--->', each)
                                
                    if not isinstance(outputs, list):
                        outputs = [outputs]
                    # Save the outputs for merging back together later
                    for l, o in enumerate(outputs):
                        outputs_all[l].append(o)

        # Merge outputs on CPU
        with tf.device('/cpu:0'):
            merged = []
            for outputs, name in zip(outputs_all, output_names):
                # If outputs are numbers without dimensions, add a batch dim.
                def add_dim(tensor):
                    """Add a dimension to tensors that don't have any."""
                    if K.int_shape(tensor) == ():
                        return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
                    return tensor
                outputs = list(map(add_dim, outputs))
                
                verbose = 0
                if verbose:
                    print ('---------------->')
                    for each in outputs:
                        print (each)
                
                # Concatenate
                merged.append(KL.Concatenate(axis=0, name=name)(outputs))
        return merged
コード例 #21
0
def build_rnn(input, config):
    ctx = 64
    down = KL.Conv2D(512, (3, 3),
                     padding="same",
                     activation="relu",
                     name='gcap_down_imagefeature')(input)

    reshaped_conv5_3_feats = KL.Lambda(
        lambda x: tf.reshape(x, [config.BATCH_SIZE, ctx, 512]))(down)
    conv_feats = reshaped_conv5_3_feats

    print("Building the RNN...")

    contexts = conv_feats
    reshaped_contexts = KL.Lambda(lambda x: tf.reshape(x, [-1, 512]))(contexts)
    temp1 = attend_1(reshaped_contexts)
    w_embedding = KL.Embedding(input_dim=5000,
                               output_dim=512,
                               name='gcap_embedding')

    # Setup the LSTM

    # Initialize the LSTM using the mean context
    # with tf.variable_scope("initialize"):
    context_mean = KL.Lambda(lambda x: tf.reduce_mean(x, axis=1))(conv_feats)
    initial_memory, initial_output = initialize(context_mean)
    initial_state = initial_memory, initial_output

    # Prepare to run
    predictions = []
    outputs = []
    current_inputs = []
    num_steps = 15
    last_output = initial_output
    last_memory = initial_memory
    last_word = KL.Lambda(lambda x: K.zeros([config.BATCH_SIZE], 'int32'))(
        input)
    last_state = last_output, last_memory
    alphas = []
    att_masks = []
    cross_entropies = []
    predictions_correct = []
    lstm = KL.LSTM(
        512,
        return_state=True,
        recurrent_activation='hard_sigmoid',
        name='gcap_lstm',
        unit_forget_bias=False)  # (last_output,initial_state = initial_state)

    # Generate the words one by one
    for idx in range(num_steps):
        # Attention mechanism
        # with tf.variable_scope("attend"):
        # alpha = attend(reshaped_contexts, last_output)

        # use 2 fc layers to attend

        temp2 = attend_2(last_output)

        temp2 = KL.Lambda(lambda x: tf.reshape(
            tf.tile(tf.expand_dims(x, 1), [1, ctx, 1]), [-1, 512]))(temp2)
        temp = KL.Add()([temp1, temp2])
        att_logits = attend_3(temp)
        att_logits = KL.Lambda(lambda x: tf.reshape(x, [-1, ctx]))(att_logits)
        alpha = KL.Softmax()(att_logits)
        alpha1 = KL.RepeatVector(512)(alpha)
        alpha1 = KL.Permute((2, 1))(alpha1)
        context = KL.Multiply()([contexts, alpha1])
        context = KL.Lambda(lambda x: tf.reduce_sum(x, axis=1))(context)
        alphas.append(alpha)
        word_embed = w_embedding(last_word)
        # Apply the LSTM
        # with tf.variable_scope("lstm"):

        current_input = KL.Concatenate(axis=-1)([context, word_embed])
        current_input = KL.Lambda(lambda x: tf.expand_dims(x, 1))(
            current_input)

        output, memory, cell_out = lstm(current_input,
                                        initial_state=list(last_state))  #
        state = memory, cell_out
        current_inputs.append(current_input)
        outputs.append(output)
        # Decode the expanded output of LSTM into a word
        # with tf.variable_scope("decode"):

        expanded_output = KL.Concatenate(axis=-1)(
            [output, context, word_embed])
        logits = decode(expanded_output)
        # probs = KL.Lambda(lambda x: tf.nn.softmax(logits))(logits)
        prediction = KL.Lambda(lambda x: tf.argmax(x, 1))(logits)
        predictions.append(prediction)

        last_output = output
        last_memory = memory
        last_state = state
        if idx == 0:
            att_mask = KL.Lambda(lambda x: K.switch(tf.equal(x[
                0], 0), tf.constant(0.0), tf.constant(1.0)))(last_word)
        else:
            att_mask = KL.Lambda(lambda x: K.switch(tf.equal(x[
                0], 2), tf.constant(0.0), tf.constant(1.0)))(last_word)
        att_masks.append(att_mask)
        last_word = KL.Lambda(lambda x: tf.cast(x, tf.int32))(prediction)  #

        # tf.get_variable_scope().reuse_variables()

        # Compute the final loss, if necessary

    outputs = KL.Lambda(
        lambda x: tf.reshape(x, [config.BATCH_SIZE, num_steps, 512]))(outputs)
    predictions = KL.Lambda(lambda x: tf.reshape(tf.cast(
        x, tf.float32), [config.BATCH_SIZE, num_steps, 1]))(predictions)
    att_masks = KL.Lambda(lambda x: tf.reshape(tf.cast(
        x, tf.float32), [num_steps, 1, 1, 1]))(att_masks)
    alphas = KL.Lambda(
        lambda x: tf.reshape(x, [config.BATCH_SIZE, num_steps, ctx]))(alphas)

    print("RNN built.")
    return outputs, predictions, alphas, att_masks
コード例 #22
0
ファイル: HH_RNN-euler.py プロジェクト: ytixu/DNNdumps
    def make_model(self):
        inputs = K_layer.Input(shape=(self.timesteps, self.input_dim))
        #sin_layer = K_layer.Lambda(lambda x: K.sin(x), output_shape=(self.timesteps, self.input_dim))
        #cos_layer = K_layer.Lambda(lambda x: K.cos(x), output_shape=(self.timesteps, self.input_dim))

        #decomposed = K_layer.concatenate([sin_layer(inputs), cos_layer(inputs)], axis=1)
        reshaped = K_layer.Reshape(
            (self.partial_n, self.partial_ts, self.input_dim))(inputs)
        encode_reshape = K_layer.Reshape((self.partial_n, self.latent_dim))
        encode_1 = RNN_UNIT(self.latent_dim)
        encode_2 = RNN_UNIT(self.latent_dim, return_sequences=True)

        def encode_partials(seq):
            encoded = [None] * self.partial_n
            for i in range(self.partial_n):
                rs = K_layer.Lambda(lambda x: x[:, i],
                                    output_shape=(self.partial_ts,
                                                  self.input_dim))(seq)
                encoded[i] = encode_1(rs)
            return encode_reshape(K_layer.concatenate(encoded, axis=1))

        encoded = encode_partials(reshaped)
        print K.int_shape(encoded), K.int_shape(reshaped)
        encoded = encode_2(encoded)

        z = K_layer.Input(shape=(self.latent_dim, ))
        decoder_activation = 'tanh'
        decode_emb = K_layer.Dense(self.latent_dim / 2,
                                   activation=decoder_activation)
        #decode_euler_1 = K_layer.Dense(self.latent_dim/4, activation=decoder_activation)
        decode_euler_2 = K_layer.Dense(self.output_dim,
                                       activation=decoder_activation)

        decode_repete = K_layer.RepeatVector(self.partial_n)
        decode_repete_part = K_layer.RepeatVector(self.partial_ts)
        decode_residual_emb = RNN_UNIT(self.latent_dim / 2,
                                       return_sequences=True,
                                       activation=decoder_activation)
        #decode_residual_euler_1 = RNN_UNIT(self.latent_dim/4, return_sequences=True, activation=decoder_activation)
        decode_residual_euler_2 = RNN_UNIT(self.output_dim,
                                           return_sequences=True,
                                           activation=decoder_activation)

        def decode_angle(e):
            emb = decode_emb(e)
            emb_residual = decode_repete(e)
            emb_residual = decode_residual_emb(emb_residual)
            emb = K_layer.add([decode_repete(emb), emb_residual])

            frames = [None] * self.timesteps
            for i in range(self.partial_n):
                e_ = K_layer.Lambda(lambda x: x[:, i],
                                    output_shape=(self.latent_dim / 2, ))(emb)
                frame = decode_euler_2(e_)
                for j in range(i * self.partial_ts, (i + 1) * self.partial_ts):
                    frames[j] = frame
            frames = K_layer.concatenate(frames, axis=1)
            frames = K_layer.Reshape((self.timesteps, self.output_dim))(frames)

            emb = K_layer.Lambda(
                lambda x: K.repeat_elements(x, self.partial_ts, axis=1),
                output_shape=(self.timesteps, self.latent_dim / 2))(emb)
            residual = decode_residual_euler_2(emb)
            frames = K_layer.Activation(decoder_activation)(K_layer.add(
                [frames, residual]))
            return frames

        angles = [None] * self.partial_n
        for i in range(self.partial_n):
            e = K_layer.Lambda(lambda x: x[:, i],
                               output_shape=(self.latent_dim, ))(encoded)
            angles[i] = decode_angle(e)

        decoded = K_layer.concatenate(angles, axis=1)
        decoded_ = decode_angle(z)

        self.encoder = Model(inputs, encoded)
        self.decoder = Model(z, decoded_)
        self.autoencoder = Model(inputs, decoded)
        opt = RMSprop(lr=L_RATE)

        def mse(yTrue, yPred):
            # 	yt = K.reshape(yTrue, (-1, self.timesteps, self.output_dim))
            #  	yp = K.reshape(yPred, (-1, self.timesteps, self.output_dim))
            a = yTrue
            b = yPred
            return tf.reduce_mean(
                tf.abs(tf.atan2(tf.sin(a - b), tf.cos(a - b))))

        #loss = K.square(K.sin(yTrue) - K.sin(yPred))
        #loss = loss + K.square(K.cos(yTrue) - K.cos(yPred))
        #loss = K.mean(K.sqrt(loss))
        #return loss

        self.autoencoder.compile(optimizer='Nadam', loss='mean_squared_error')

        self.autoencoder.summary()
        self.encoder.summary()
        self.decoder.summary()
コード例 #23
0
ファイル: mnist_swwae.py プロジェクト: richardj2020/keras1
# Shape of input to train on (note that model is fully convolutional however)
input_shape = x_train.shape[1:]
# The final list of the size of axis=1 for all layers, including input
nfeats_all = [input_shape[0]] + nfeats

# First build the encoder, all the while keeping track of the 'where' masks
img_input = Input(shape=input_shape)

# We push the 'where' masks to the following list
wheres = [None] * nlayers
y = img_input
for i in range(nlayers):
    y_prepool = convresblock(y, nfeats=nfeats_all[i + 1], ksize=ksize)
    y = MaxPooling2D(pool_size=(pool_sizes[i], pool_sizes[i]))(y_prepool)
    wheres[i] = layers.Lambda(getwhere,
                              output_shape=lambda x: x[0])([y_prepool, y])

# Now build the decoder, and use the stored 'where' masks to place the features
for i in range(nlayers):
    ind = nlayers - 1 - i
    y = UpSampling2D(size=(pool_sizes[ind], pool_sizes[ind]))(y)
    y = layers.multiply([y, wheres[ind]])
    y = convresblock(y, nfeats=nfeats_all[ind], ksize=ksize)

# Use hard_simgoid to clip range of reconstruction
y = Activation('hard_sigmoid')(y)

# Define the model and it's mean square error loss, and compile it with Adam
model = Model(img_input, y)
model.compile('adam', 'mse')
def model_ContextSum(p, embedding_matrix, max_sent_len, n_out):
    print("Parameters:", p)

    # Take sentence encoded as indices and convert it to embeddings
    sentence_input = layers.Input(shape=(max_sent_len, ),
                                  dtype='int32',
                                  name='sentence_input')
    # Repeat the input N times for each edge
    x = layers.RepeatVector(MAX_EDGES_PER_GRAPH)(sentence_input)
    word_embeddings = layers.wrappers.TimeDistributed(
        layers.Embedding(output_dim=embedding_matrix.shape[1],
                         input_dim=embedding_matrix.shape[0],
                         input_length=max_sent_len,
                         weights=[embeddings],
                         mask_zero=True,
                         trainable=False))(x)
    word_embeddings = layers.Dropout(p['dropout1'])(word_embeddings)

    # Take token markers that identify entity positions, convert to position embeddings
    entity_markers = layers.Input(shape=(
        MAX_EDGES_PER_GRAPH,
        max_sent_len,
    ),
                                  dtype='int8',
                                  name='entity_markers')
    pos_embeddings = layers.wrappers.TimeDistributed(
        layers.Embedding(output_dim=p['position_emb'],
                         input_dim=POSITION_VOCAB_SIZE,
                         input_length=max_sent_len,
                         mask_zero=True,
                         embeddings_regularizer=regularizers.l2(),
                         trainable=True))(entity_markers)

    # Merge word and position embeddings and apply the specified amount of RNN layers
    for i in range(p["rnn1_layers"] - 1):
        lstm_layer = layers.LSTM(p['units1'], return_sequences=True)
        if p['bidirectional']:
            lstm_layer = layers.Bidirectional(lstm_layer)
        x = layers.wrappers.TimeDistributed(lstm_layer)(x)
    lstm_layer = layers.LSTM(p['units1'], return_sequences=False)
    if p['bidirectional']:
        lstm_layer = layers.Bidirectional(lstm_layer)
    sentence_matrix = layers.wrappers.TimeDistributed(lstm_layer)(x)

    # Take the vector of the sentences with the target entity pair
    layers_to_concat = []
    num_units = p['units1'] * (2 if p['bidirectional'] else 1)
    for i in range(MAX_EDGES_PER_GRAPH):
        sentence_vector = layers.Lambda(
            lambda l: l[:, i], output_shape=(num_units, ))(sentence_matrix)
        if i == 0:
            context_vectors = layers.Lambda(
                lambda l: l[:, i + 1:],
                output_shape=(MAX_EDGES_PER_GRAPH - 1,
                              num_units))(sentence_matrix)
        elif i == MAX_EDGES_PER_GRAPH - 1:
            context_vectors = layers.Lambda(
                lambda l: l[:, :i],
                output_shape=(MAX_EDGES_PER_GRAPH - 1,
                              num_units))(sentence_matrix)
        else:
            context_vectors = layers.Lambda(
                lambda l: K.concatenate([l[:, :i], l[:, i + 1:]], axis=1),
                output_shape=(MAX_EDGES_PER_GRAPH - 1,
                              num_units))(sentence_matrix)
        context_vector = GlobalSumPooling1D()(context_vectors)
        edge_vector = layers.concatenate([sentence_vector, context_vector])
        edge_vector = layers.Reshape((1, num_units * 2))(edge_vector)
        layers_to_concat.append(edge_vector)
    edge_vectors = layers.Concatenate(1)(layers_to_concat)

    # Apply softmax
    edge_vectors = layers.Dropout(p['dropout1'])(edge_vectors)
    main_output = layers.wrappers.TimeDistributed(
        layers.Dense(n_out, activation="softmax",
                     name='main_output'))(edge_vectors)

    model = models.Model(inputs=[sentence_input, entity_markers],
                         outputs=[main_output])
    model.compile(optimizer=p['optimizer'],
                  loss=masked_categorical_crossentropy,
                  metrics=['accuracy'])

    return model
コード例 #25
0
ファイル: mask_rcnn.py プロジェクト: wanxinjun/mask_rcnn_pro
    def build(self):

        # image shape
        h, w, c = self.image_shape[:]
        print("image_shape: {}".format(self.image_shape))

        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
            raise Exception(
                "Image size must be dividable by 2 at least 6 times "
                "to avoid fractions when downscaling and upscaling."
                "For example, use 256, 320, 384, 448, 512, ... etc. ")

            # Inputs
        input_image = kl.Input(shape=[None, None, c], name="input_image")
        input_image_meta = kl.Input(shape=[cfg.COMMON.IMAGE_META_SIZE],
                                    name="input_image_meta")

        # 训练
        if self.train_flag:

            # RPN GT
            input_rpn_match = kl.Input(shape=[None, 1],
                                       name="input_rpn_match",
                                       dtype=tf.int32)
            input_rpn_bbox = kl.Input(shape=[None, 4],
                                      name="input_rpn_bbox",
                                      dtype=tf.float32)

            # Detection GT (class IDs, bounding boxes, and masks)
            # 1. GT Class IDs (zero padded)
            input_gt_class_ids = kl.Input(shape=[None],
                                          name="input_gt_class_ids",
                                          dtype=tf.int32)

            # 2. GT Boxes in pixels (zero padded)
            # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates
            input_gt_boxes = kl.Input(shape=[None, 4],
                                      name="input_gt_boxes",
                                      dtype=tf.float32)

            # Normalize coordinates
            gt_boxes = kl.Lambda(lambda x: self.bbox_util.norm_boxes_graph(
                x,
                k.shape(input_image)[1:3]))(input_gt_boxes)

            # 3. GT Masks (zero padded)
            # [batch, height, width, MAX_GT_INSTANCES]
            if cfg.TRAIN.USE_MINI_MASK:
                min_h, min_w = cfg.TRAIN.MINI_MASK_SHAPE[:]
                input_gt_masks = kl.Input(shape=[min_h, min_w, None],
                                          name="input_gt_masks",
                                          dtype=bool)
            else:
                input_gt_masks = kl.Input(shape=[h, w, None],
                                          name="input_gt_masks",
                                          dtype=bool)
                pass

            # anchor
            anchors = self.anchor_utils.get_anchors(self.image_shape)

            # Duplicate across the batch dimension because Keras requires it
            # TODO: can this be optimized to avoid duplicating the anchors?
            anchors = np.broadcast_to(anchors,
                                      (self.batch_size, ) + anchors.shape)
            # A hack to get around Keras's bad support for constants
            anchors = kl.Lambda(lambda x: tf.Variable(anchors),
                                name="anchors")(input_image)

            anchors = kl.Lambda(lambda x: tf.Variable(anchors),
                                name="anchors")(input_image)
            pass

        else:
            # Anchors in normalized coordinates
            anchors = kl.Input(shape=[None, 4], name="input_anchors")

            # 上面训练用到的参数,测试不需要,但是在 if else 里面定义一下,免得 undefined
            input_rpn_match = None
            input_rpn_bbox = None
            input_gt_class_ids = None
            gt_boxes = None
            input_gt_boxes = None
            input_gt_masks = None
            pass

        # Build the shared convolutional layers.
        # Bottom-up Layers
        # Returns a list of the last layers of each stage, 5 in total.
        # Don't create the thead (stage 5), so we pick the 4th item in the list.
        _, c2, c3, c4, c5 = backbone.resnet_graph(input_image,
                                                  self.backbone,
                                                  stage5=True)

        # Top-down Layers
        # TODO: add assert to varify feature map sizes match what's in config
        p5 = kl.Conv2D(self.top_down_pyramid_size, (1, 1), name='fpn_c5p5')(c5)
        p4 = kl.Add(name="fpn_p4add")([
            kl.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(p5),
            kl.Conv2D(self.top_down_pyramid_size, (1, 1), name='fpn_c4p4')(c4)
        ])
        p3 = kl.Add(name="fpn_p3add")([
            kl.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(p4),
            kl.Conv2D(self.top_down_pyramid_size, (1, 1), name='fpn_c3p3')(c3)
        ])
        p2 = kl.Add(name="fpn_p2add")([
            kl.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(p3),
            kl.Conv2D(self.top_down_pyramid_size, (1, 1), name='fpn_c2p2')(c2)
        ])

        # Attach 3x3 conv to all P layers to get the final feature maps.
        p2 = kl.Conv2D(self.top_down_pyramid_size, (3, 3),
                       padding="SAME",
                       name="fpn_p2")(p2)
        p3 = kl.Conv2D(self.top_down_pyramid_size, (3, 3),
                       padding="SAME",
                       name="fpn_p3")(p3)
        p4 = kl.Conv2D(self.top_down_pyramid_size, (3, 3),
                       padding="SAME",
                       name="fpn_p4")(p4)
        p5 = kl.Conv2D(self.top_down_pyramid_size, (3, 3),
                       padding="SAME",
                       name="fpn_p5")(p5)
        # P6 is used for the 5th anchor scale in RPN. Generated by
        # subsampling from P5 with stride of 2.
        p6 = kl.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(p5)

        # Note that P6 is used in RPN, but not in the classifier heads.
        rpn_feature_maps = [p2, p3, p4, p5, p6]
        mrcnn_feature_maps = [p2, p3, p4, p5]

        # RPN Model
        rpn = common.build_rpn_model(self.rpn_anchor_stride,
                                     len(self.rpn_anchor_ratios),
                                     self.top_down_pyramid_size)

        # Loop through pyramid layers
        layer_outputs = []  # list of lists
        for p in rpn_feature_maps:
            layer_outputs.append(rpn([p]))
            pass

        # Concatenate layer outputs
        # Convert from list of lists of level outputs to list of lists
        # of outputs across levels.
        # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
        output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"]
        outputs = list(zip(*layer_outputs))
        outputs = [
            kl.Concatenate(axis=1, name=n)(list(o))
            for o, n in zip(outputs, output_names)
        ]

        rpn_class_logits, rpn_class, rpn_bbox = outputs

        # Generate proposals
        # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
        # and zero padded.
        proposal_count = cfg.TRAIN.POST_NMS_ROIS if self.train_flag else cfg.TEST.POST_NMS_ROIS

        rpn_rois = common.ProposalLayer(
            proposal_count=proposal_count,
            nms_threshold=self.rpn_nms_threshold,
            batch_size=self.batch_size,
            name="ROI")([rpn_class, rpn_bbox, anchors])

        fc_layer_size = cfg.COMMON.FPN_CLASS_FC_LAYERS_SIZE
        pool_size = cfg.COMMON.POOL_SIZE
        mask_pool_size = cfg.COMMON.MASK_POOL_SIZE
        train_or_freeze = cfg.COMMON.TRAIN_FLAG

        if self.train_flag:

            # Class ID mask to mark class IDs supported by the dataset the image
            # came from.
            active_class_ids = kl.Lambda(
                lambda x: self.image_utils.parse_image_meta_graph(x)[
                    "active_class_ids"])(input_image_meta)

            if not cfg.TRAIN.USE_RPN_ROIS:
                # Ignore predicted ROIs and use ROIs provided as an input.
                input_rois = kl.Input(shape=[proposal_count, 4],
                                      name="input_roi",
                                      dtype=np.int32)
                # Normalize coordinates
                target_rois = kl.Lambda(
                    lambda x: self.bbox_util.norm_boxes_graph(
                        x,
                        k.shape(input_image)[1:3]))(input_rois)
            else:
                target_rois = rpn_rois
                input_rois = None

            # Generate detection targets
            # Subsamples proposals and generates target outputs for training
            # Note that proposal class IDs, gt_boxes, and gt_masks are zero
            # padded. Equally, returned rois and targets are zero padded.
            rois, target_class_ids, target_bbox, target_mask = \
                common.DetectionTargetLayer(self.batch_size, name="proposal_targets")([
                    target_rois, input_gt_class_ids, gt_boxes, input_gt_masks])

            # Network Heads
            # TODO: verify that this handles zero padded ROIs
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox = common.fpn_classifier_graph(
                rois,
                mrcnn_feature_maps,
                input_image_meta,
                pool_size,
                self.class_num,
                train_flag=train_or_freeze,
                fc_layers_size=fc_layer_size)

            mrcnn_mask = common.build_fpn_mask_graph(
                rois,
                mrcnn_feature_maps,
                input_image_meta,
                mask_pool_size,
                self.class_num,
                train_flag=train_or_freeze)

            # TODO: clean up (use tf.identify if necessary)
            output_rois = kl.Lambda(lambda x: x * 1, name="output_rois")(rois)

            # Losses
            rpn_class_loss = kl.Lambda(
                lambda x: common.rpn_class_loss_graph(*x),
                name="rpn_class_loss")([input_rpn_match, rpn_class_logits])
            rpn_bbox_loss = kl.Lambda(
                lambda x: common.rpn_bbox_loss_graph(self.batch_size, *x),
                name="rpn_bbox_loss")(
                    [input_rpn_bbox, input_rpn_match, rpn_bbox])
            class_loss = kl.Lambda(lambda x: common.mrcnn_class_loss_graph(*x),
                                   name="mrcnn_class_loss")([
                                       target_class_ids, mrcnn_class_logits,
                                       active_class_ids
                                   ])
            bbox_loss = kl.Lambda(lambda x: common.mrcnn_bbox_loss_graph(*x),
                                  name="mrcnn_bbox_loss")([
                                      target_bbox, target_class_ids, mrcnn_bbox
                                  ])
            mask_loss = kl.Lambda(lambda x: common.mrcnn_mask_loss_graph(*x),
                                  name="mrcnn_mask_loss")([
                                      target_mask, target_class_ids, mrcnn_mask
                                  ])

            # Model
            inputs = [
                input_image, input_image_meta, input_rpn_match, input_rpn_bbox,
                input_gt_class_ids, input_gt_boxes, input_gt_masks
            ]

            if not cfg.TRAIN.USE_RPN_ROIS:
                inputs.append(input_rois)

            outputs = [
                rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits,
                mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois,
                rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss
            ]
            model = km.Model(inputs, outputs, name='mask_rcnn')
            pass
        else:
            # Network Heads
            # Proposal classifier and BBox regressor heads
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox = common.fpn_classifier_graph(
                rpn_rois,
                mrcnn_feature_maps,
                input_image_meta,
                pool_size,
                self.class_num,
                train_flag=train_or_freeze,
                fc_layers_size=fc_layer_size)

            # Detections
            # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
            # normalized coordinates
            detections = common.DetectionLayer(self.batch_size,
                                               name="mrcnn_detection")([
                                                   rpn_rois, mrcnn_class,
                                                   mrcnn_bbox, input_image_meta
                                               ])

            # Create masks for detections
            detection_boxes = kl.Lambda(lambda x: x[..., :4])(detections)
            mrcnn_mask = common.build_fpn_mask_graph(
                detection_boxes,
                mrcnn_feature_maps,
                input_image_meta,
                mask_pool_size,
                self.class_num,
                train_flag=train_or_freeze)

            model = km.Model([input_image, input_image_meta, anchors], [
                detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois,
                rpn_class, rpn_bbox
            ],
                             name='mask_rcnn')
            pass

        # Add multi-GPU support. 多 GPU 操作
        gpu_count = cfg.COMMON.GPU_COUNT
        if gpu_count > 1:
            from m_rcnn.parallel_model import ParallelModel
            model = ParallelModel(model, gpu_count)

        return model
        pass
def model_ContextWeighted(p, embedding_matrix, max_sent_len, n_out):
    print("Parameters:", p)

    # Take sentence encoded as indices and convert it to embeddings
    sentence_input = layers.Input(shape=(max_sent_len, ),
                                  dtype='int32',
                                  name='sentence_input')
    # Repeat the input N times for each edge
    x = layers.RepeatVector(MAX_EDGES_PER_GRAPH)(sentence_input)
    word_embeddings = layers.wrappers.TimeDistributed(
        layers.Embedding(output_dim=embedding_matrix.shape[1],
                         input_dim=embedding_matrix.shape[0],
                         input_length=max_sent_len,
                         weights=[embedding_matrix],
                         mask_zero=True,
                         trainable=False))(x)
    word_embeddings = layers.Dropout(p['dropout1'])(word_embeddings)

    # Take token markers that identify entity positions, convert to position embeddings
    entity_markers = layers.Input(shape=(
        MAX_EDGES_PER_GRAPH,
        max_sent_len,
    ),
                                  dtype='int8',
                                  name='entity_markers')
    pos_embeddings = layers.wrappers.TimeDistributed(
        layers.Embedding(output_dim=p['position_emb'],
                         input_dim=POSITION_VOCAB_SIZE,
                         input_length=max_sent_len,
                         mask_zero=True,
                         embeddings_regularizer=regularizers.l2(),
                         trainable=True))(entity_markers)

    # Merge word and position embeddings and apply the specified amount of RNN layers
    x = layers.concatenate([word_embeddings, pos_embeddings])
    for i in range(p["rnn1_layers"] - 1):
        lstm_layer = layers.LSTM(p['units1'], return_sequences=True)
        if p['bidirectional']:
            lstm_layer = layers.Bidirectional(lstm_layer)
        x = layers.wrappers.TimeDistributed(lstm_layer)(x)
    lstm_layer = layers.LSTM(p['units1'], return_sequences=False)
    if p['bidirectional']:
        lstm_layer = layers.Bidirectional(lstm_layer)
    sentence_matrix = layers.wrappers.TimeDistributed(lstm_layer)(x)

    ### Attention over ghosts ###
    layers_to_concat = []
    num_units = p['units1'] * (2 if p['bidirectional'] else 1)
    for i in range(MAX_EDGES_PER_GRAPH):
        # Compute a memory vector for the target entity pair
        sentence_vector = layers.Lambda(
            lambda l: l[:, i], output_shape=(num_units, ))(sentence_matrix)
        target_sentence_memory = layers.Dense(num_units,
                                              activation="linear",
                                              use_bias=False)(sentence_vector)
        if i == 0:
            context_vectors = layers.Lambda(
                lambda l: l[:, i + 1:],
                output_shape=(MAX_EDGES_PER_GRAPH - 1,
                              num_units))(sentence_matrix)
        elif i == MAX_EDGES_PER_GRAPH - 1:
            context_vectors = layers.Lambda(
                lambda l: l[:, :i],
                output_shape=(MAX_EDGES_PER_GRAPH - 1,
                              num_units))(sentence_matrix)
        else:
            context_vectors = layers.Lambda(
                lambda l: K.concatenate([l[:, :i], l[:, i + 1:]], axis=1),
                output_shape=(MAX_EDGES_PER_GRAPH - 1,
                              num_units))(sentence_matrix)
        # Compute the score between each memory and the memory of the target entity pair
        sentence_scores = layers.Lambda(
            lambda inputs: K.batch_dot(inputs[0], inputs[1], axes=(1, 2)),
            output_shape=(MAX_EDGES_PER_GRAPH, ))(
                [target_sentence_memory, context_vectors])
        sentence_scores = layers.Activation('softmax')(sentence_scores)

        # Compute the final vector by taking the weighted sum of context vectors and the target entity vector
        context_vector = layers.Lambda(
            lambda inputs: K.batch_dot(inputs[0], inputs[1], axes=(1, 1)),
            output_shape=(num_units, ))([context_vectors, sentence_scores])
        edge_vector = layers.concatenate([sentence_vector, context_vector])
        edge_vector = layers.Reshape((1, num_units * 2))(edge_vector)
        layers_to_concat.append(edge_vector)

    edge_vectors = layers.concatenate(layers_to_concat, axis=1)

    # Apply softmax
    edge_vectors = layers.Dropout(p['dropout1'])(edge_vectors)
    main_output = layers.wrappers.TimeDistributed(
        layers.Dense(n_out, activation="softmax",
                     name='main_output'))(edge_vectors)

    model = models.Model(inputs=[sentence_input, entity_markers],
                         outputs=[main_output])
    optimizer = optimizers.Adam(lr=0.001)
    model.compile(optimizer=optimizer,
                  loss=masked_categorical_crossentropy,
                  metrics=['accuracy'])

    return model
コード例 #27
0
 def add_dim(tensor):
     """Add a dimension to tensors that don't have any."""
     if K.int_shape(tensor) == ():
         return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(
             tensor)
     return tensor
コード例 #28
0
ファイル: yolov4.py プロジェクト: zhangyahui520/Keras-YOLOv4
def YOLOv4(inputs,
           num_classes,
           num_anchors,
           initial_filters=32,
           fast=False,
           anchors=None,
           conf_thresh=0.05,
           nms_thresh=0.45,
           keep_top_k=100,
           nms_top_k=100):
    i32 = initial_filters
    i64 = i32 * 2
    i128 = i32 * 4
    i256 = i32 * 8
    i512 = i32 * 16
    i1024 = i32 * 32

    if fast:
        # x = PreLayer()(inputs)
        x = inputs
    else:
        x = inputs

    # cspdarknet53部分
    x = conv2d_unit(x, i32, 3, strides=1, padding='same')

    # ============================= s2 =============================
    x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(x)
    x = conv2d_unit(x, i64, 3, strides=2)
    s2 = conv2d_unit(x, i64, 1, strides=1)
    x = conv2d_unit(x, i64, 1, strides=1)
    x = stack_residual_block(x, i32, i64, n=1)
    x = conv2d_unit(x, i64, 1, strides=1)
    x = layers.Concatenate()([x, s2])
    s2 = conv2d_unit(x, i64, 1, strides=1)

    # ============================= s4 =============================
    x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(s2)
    x = conv2d_unit(x, i128, 3, strides=2)
    s4 = conv2d_unit(x, i64, 1, strides=1)
    x = conv2d_unit(x, i64, 1, strides=1)
    x = stack_residual_block(x, i64, i64, n=2)
    x = conv2d_unit(x, i64, 1, strides=1)
    x = layers.Concatenate()([x, s4])
    s4 = conv2d_unit(x, i128, 1, strides=1)

    # ============================= s8 =============================
    x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(s4)
    x = conv2d_unit(x, i256, 3, strides=2)
    s8 = conv2d_unit(x, i128, 1, strides=1)
    x = conv2d_unit(x, i128, 1, strides=1)
    x = stack_residual_block(x, i128, i128, n=8)
    x = conv2d_unit(x, i128, 1, strides=1)
    x = layers.Concatenate()([x, s8])
    s8 = conv2d_unit(x, i256, 1, strides=1)

    # ============================= s16 =============================
    x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(s8)
    x = conv2d_unit(x, i512, 3, strides=2)
    s16 = conv2d_unit(x, i256, 1, strides=1)
    x = conv2d_unit(x, i256, 1, strides=1)
    x = stack_residual_block(x, i256, i256, n=8)
    x = conv2d_unit(x, i256, 1, strides=1)
    x = layers.Concatenate()([x, s16])
    s16 = conv2d_unit(x, i512, 1, strides=1)

    # ============================= s32 =============================
    x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(s16)
    x = conv2d_unit(x, i1024, 3, strides=2)
    s32 = conv2d_unit(x, i512, 1, strides=1)
    x = conv2d_unit(x, i512, 1, strides=1)
    x = stack_residual_block(x, i512, i512, n=4)
    x = conv2d_unit(x, i512, 1, strides=1)
    x = layers.Concatenate()([x, s32])
    s32 = conv2d_unit(x, i1024, 1, strides=1)
    # cspdarknet53部分结束

    # fpn部分
    x = conv2d_unit(s32, i512, 1, strides=1, act='leaky')
    x = conv2d_unit(x, i1024, 3, strides=1, padding='same', act='leaky')
    x = conv2d_unit(x, i512, 1, strides=1, act='leaky')
    x = spp(x)

    x = conv2d_unit(x, i512, 1, strides=1, act='leaky')
    x = conv2d_unit(x, i1024, 3, strides=1, padding='same', act='leaky')
    fpn_s32 = conv2d_unit(x, i512, 1, strides=1, act='leaky')

    # pan01
    x = conv2d_unit(fpn_s32, i256, 1, strides=1, act='leaky')
    x = layers.UpSampling2D(2)(x)
    s16 = conv2d_unit(s16, i256, 1, strides=1, act='leaky')
    x = layers.Concatenate()([s16, x])
    x = conv2d_unit(x, i256, 1, strides=1, act='leaky')
    x = conv2d_unit(x, i512, 3, strides=1, padding='same', act='leaky')
    x = conv2d_unit(x, i256, 1, strides=1, act='leaky')
    x = conv2d_unit(x, i512, 3, strides=1, padding='same', act='leaky')
    fpn_s16 = conv2d_unit(x, i256, 1, strides=1, act='leaky')
    # pan01结束

    # pan02
    x = conv2d_unit(fpn_s16, i128, 1, strides=1, act='leaky')
    x = layers.UpSampling2D(2)(x)
    s8 = conv2d_unit(s8, i128, 1, strides=1, act='leaky')
    x = layers.Concatenate()([s8, x])
    x = conv2d_unit(x, i128, 1, strides=1, act='leaky')
    x = conv2d_unit(x, i256, 3, strides=1, padding='same', act='leaky')
    x = conv2d_unit(x, i128, 1, strides=1, act='leaky')
    x = conv2d_unit(x, i256, 3, strides=1, padding='same', act='leaky')
    x = conv2d_unit(x, i128, 1, strides=1, act='leaky')
    # pan02结束

    # output_s, 不用concat()
    output_s = conv2d_unit(x, i256, 3, strides=1, padding='same', act='leaky')
    output_s = conv2d_unit(output_s,
                           num_anchors * (num_classes + 5),
                           1,
                           strides=1,
                           bn=0,
                           act=None)

    # output_m, 需要concat()
    x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(x)
    x = conv2d_unit(x, i256, 3, strides=2, act='leaky')
    x = layers.Concatenate()([x, fpn_s16])
    x = conv2d_unit(x, i256, 1, strides=1, act='leaky')
    x = conv2d_unit(x, i512, 3, strides=1, padding='same', act='leaky')
    x = conv2d_unit(x, i256, 1, strides=1, act='leaky')
    x = conv2d_unit(x, i512, 3, strides=1, padding='same', act='leaky')
    x = conv2d_unit(x, i256, 1, strides=1, act='leaky')
    output_m = conv2d_unit(x, i512, 3, strides=1, padding='same', act='leaky')
    output_m = conv2d_unit(output_m,
                           num_anchors * (num_classes + 5),
                           1,
                           strides=1,
                           bn=0,
                           act=None)

    # output_l, 需要concat()
    x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(x)
    x = conv2d_unit(x, i512, 3, strides=2, act='leaky')
    x = layers.Concatenate()([x, fpn_s32])
    x = conv2d_unit(x, i512, 1, strides=1, act='leaky')
    x = conv2d_unit(x, i1024, 3, strides=1, padding='same', act='leaky')
    x = conv2d_unit(x, i512, 1, strides=1, act='leaky')
    x = conv2d_unit(x, i1024, 3, strides=1, padding='same', act='leaky')
    x = conv2d_unit(x, i512, 1, strides=1, act='leaky')
    output_l = conv2d_unit(x, i1024, 3, strides=1, padding='same', act='leaky')
    output_l = conv2d_unit(output_l,
                           num_anchors * (num_classes + 5),
                           1,
                           strides=1,
                           bn=0,
                           act=None)

    # 用张量操作实现后处理
    if fast:

        def output_layer(args):
            output_s, output_m, output_l = args

            # 先对坐标解码
            pred_xywh_s, pred_conf_s, pred_prob_s = decode(
                output_s, anchors[0], 8, num_classes)
            pred_xywh_m, pred_conf_m, pred_prob_m = decode(
                output_m, anchors[1], 16, num_classes)
            pred_xywh_l, pred_conf_l, pred_prob_l = decode(
                output_l, anchors[2], 32, num_classes)
            # 获取分数
            pred_score_s = pred_conf_s * pred_prob_s
            pred_score_m = pred_conf_m * pred_prob_m
            pred_score_l = pred_conf_l * pred_prob_l
            # 所有输出层的预测框集合后再执行nms
            all_pred_boxes = tf.concat([pred_xywh_s, pred_xywh_m, pred_xywh_l],
                                       axis=1)  # [batch_size, -1, 4]
            all_pred_scores = tf.concat(
                [pred_score_s, pred_score_m, pred_score_l],
                axis=1)  # [batch_size, -1, 80]

            # 用fastnms
            output = fastnms(all_pred_boxes, all_pred_scores, conf_thresh,
                             nms_thresh, keep_top_k, nms_top_k)

            return output

        output = layers.Lambda(output_layer)([output_s, output_m, output_l])
        model_body = keras.models.Model(inputs=inputs, outputs=output)
    else:
        model_body = keras.models.Model(inputs=inputs,
                                        outputs=[output_l, output_m, output_s])
    return model_body
コード例 #29
0
# %%

# Build our model


# We create a function to integrate the tensorflow model with a Keras model
# This requires explicitly casting the tensor to a string, because of a Keras quirk
def ElmoEmbedding(x):
    return elmo_model(tf.squeeze(tf.cast(x, tf.string)),
                      signature="default",
                      as_dict=True)["default"]


input_text = layers.Input(shape=(1, ), dtype=tf.string)
x = layers.Lambda(ElmoEmbedding, output_shape=(1024, ))(input_text)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(1, activation='sigmoid')(x)

model = Model(inputs=[input_text], outputs=x)

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

# %%
# Create datasets (Only take up to 150 words for memory)
train_text = train_df['sentence'].tolist()
train_text = [' '.join(t.split()[0:150]) for t in train_text]
train_text = np.array(train_text, dtype=object)[:, np.newaxis]
 def test_lambda(self):
     x = Normal(loc=tf.zeros([100, 10, 5]), scale=tf.ones([100, 10, 5]))
     y = layers.Lambda(lambda x: x**2)(x)