def build_model(self): print('\n----------BUILD MODEL----------\n') inputs = Input(shape=(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNEL)) # ----------Block 1----------- print('\n---Block 1---') x = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(inputs) print('Conv2D:', x.get_shape().as_list()) x = MaxPool2D(pool_size=(2, 2), strides=2)(x) print('MaxPool2D:', x.get_shape().as_list()) # ----------Block 2----------- print('\n---Block 2---') x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(x) print('Conv2D:', x.get_shape().as_list()) x = MaxPool2D(pool_size=(2, 2), strides=2)(x) print('MaxPool2D:', x.get_shape().as_list()) x = BatchNormalization()(x) # ----------Block 3----------- print('\n---Block 3---') x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(x) print('Conv2D:', x.get_shape().as_list()) x = MaxPool2D(pool_size=(2, 2), strides=2)(x) print('MaxPool2D:', x.get_shape().as_list()) # ----------Merge 2 CNNs---------- print('\n---Merge 2 CNNs---') detector = x detector_shape = detector.get_shape().as_list() extractor = x extractor_shape = extractor.get_shape().as_list() detector = Reshape([detector_shape[1] * detector_shape[2], detector_shape[3]])(detector) print('Detector:', detector.get_shape().as_list()) extractor = Reshape([extractor_shape[1] * extractor_shape[2], extractor_shape[3]])(extractor) print('Extractor:', extractor.get_shape().as_list()) bcnn = Lambda(_outer_product)([detector, extractor]) print('Outer product:', bcnn.get_shape().as_list()) bcnn = Reshape([detector_shape[3] * extractor_shape[3]])(bcnn) print('Reshape:', bcnn.get_shape().as_list()) bcnn = Lambda(_signed_sqrt)(bcnn) print('Signed square root:', bcnn.get_shape().as_list()) bcnn = Lambda(_l2_normalise)(bcnn) print('L2 normalisation:', bcnn.get_shape().as_list()) # ----------Fully Connected---------- bcnn = Dense(units=N_CLASSES, activation='softmax')(bcnn) print('Softmax:', bcnn.get_shape().as_list()) bcnn_model = Model(inputs=[inputs], outputs=[bcnn]) return bcnn_model
def fpn_classifier_graph(rois, feature_maps, image_shape, pool_size, num_classes): """Builds the computation graph of the feature pyramid network classifier and regressor heads. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from diffent layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. image_shape: [height, width, depth] pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results Returns: logits: [N, NUM_CLASSES] classifier logits (before softmax) probs: [N, NUM_CLASSES] classifier probabilities bbox_deltas: [N, (dy, dx, log(dh), log(dw))] Deltas to apply to proposal boxes """ # ROI Pooling # Shape: [batch, num_boxes, pool_height, pool_width, channels] x = PyramidROIAlign([pool_size, pool_size], image_shape, name="roi_align_classifier")([rois] + feature_maps) print x.get_shape() # Two 1024 FC layers (implemented with Conv2D for consistency) x = TimeDistributed(Conv2D(1024, (pool_size, pool_size), padding="valid"), name="mrcnn_class_conv1")(x) x = TimeDistributed(BatchNorm(axis=3), name='mrcnn_class_bn1')(x) x = Activation('relu')(x) x = TimeDistributed(Conv2D(1024, (1, 1)), name="mrcnn_class_conv2")(x) x = TimeDistributed(BatchNorm(axis=3), name='mrcnn_class_bn2')(x) x = Activation('relu')(x) shared = Lambda(lambda x: K.squeeze(K.squeeze(x, 4), 3), name="pool_squeeze")(x) print shared.get_shape() # Classifier head mrcnn_class_logits = TimeDistributed(Dense(num_classes), name='mrcnn_class_logits')(shared) mrcnn_probs = TimeDistributed(Activation("softmax"), name="mrcnn_class")(mrcnn_class_logits) print mrcnn_class_logits.get_shape() print mrcnn_probs.get_shape() # BBox head # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))] x = TimeDistributed(Dense(num_classes * 4, activation='linear'), name='mrcnn_bbox_fc')(shared) # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))] s = tf.shape(x) mrcnn_bbox = Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x) return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
def f(input_x): x = self.first_layer(input_x, scope='first_layer') features = [] filters = self.init_filters self.filter_list = [filters] # First Layer consumed one stage for i in range(repetitions): print('\nBuilding ... %d/%d' % (i, repetitions)) # Get Downsample scope = 'stage_%d' % (i + 1) if i == 0: down_x = self.residual_layer(filters, scope=scope, first_layer_stride=(2, 2), res_block=self.blocks)(x) else: down_x = self.residual_layer(filters, scope=scope, first_layer_stride=(2, 2), res_block=self.blocks)(features[-1]) features.append(down_x) # Get concatenated feature maps out_maps = self.multi_resolution_concat(features, self.filter_list) features = [] print('Identity Mapping:') # Residual connection with 3x3 kernel, 1x1 stride with same number of filters for idx, (fm, num_filter) in enumerate(zip(out_maps, self.filter_list)): x = Lambda(lambda x: x, output_shape=x.get_shape().as_list())(fm) print(idx, x) features.append(x) filters *= 2 self.filter_list.append(filters) return features
def channel_attention_m(x, residual=False, stream=False): if not stream: # dims: BxHxWxCxM (M streams) if isinstance(x, list): x = Lambda(lambda var: K.stack(var, axis=4))(x) y = GlobalMaxPooling3D()(x) y = Lambda(lambda var: K.expand_dims( K.expand_dims(K.expand_dims(var, axis=1), axis=2), axis=3))(y) y = Conv3D(filters=int(K.int_shape(x)[-1] / 2), kernel_size=1, strides=1)(y) y = Activation("relu")(y) y = Conv3D(filters=K.int_shape(x)[-1], kernel_size=1, strides=1)(y) y = Activation("softmax")(y) y = Lambda(lambda var: tf.multiply(*var))([x, y]) if residual: y = Add()([y, x]) else: # dims: BxHxWxCxM (M streams) y = GlobalMaxPooling3D()(x) y = Lambda(lambda var: K.expand_dims( K.expand_dims(K.expand_dims(var, axis=1), axis=2), axis=3))(y) y = Conv3D(filters=int(K.int_shape(x)[-1] / 2), kernel_size=1, strides=1)(y) y = Activation("relu")(y) y = Conv3D(filters=2, kernel_size=1, strides=1)(y) y = Activation("sigmoid")(y) y_l = [] c = int(x.get_shape().as_list()[-1] / 2) for i in range(2): ind_st = i * c ind_end = (i + 1) * c x_sub = Lambda(slicing, arguments={ 'index': ind_st, 'index_end': ind_end })(x) y_sub = Lambda(slicing, arguments={ 'index': i, 'index_end': i + 1 })(y) y = Lambda(lambda var: tf.multiply(*var))([x_sub, y_sub]) if residual: y = Add()([y, x_sub]) y_l.append(y) y = concatenate(y_l) return y
def build(self): resnet50_model = ResNet50(include_top=False, weights='imagenet', input_shape=(32, 256, 3)) x = resnet50_model.output conv_outputs = Lambda(self.squeeze_wrapper)(x) logger.debug("Resnet50输出的feature map shape:%r", conv_outputs.get_shape().as_list()) inputs = resnet50_model.inputs # inputs=[(-1,32,256,3)] # conv_outputs=[-1,8,2048] return conv_outputs, inputs[0]
print(encoder_inputs.get_shape()) #(?, 80) activations = LSTM(64, return_sequences=True)(encoder_inputs) attention = TimeDistributed(Dense(1, activation='tanh'))(activations) attention = Flatten()(attention) attention = Activation('softmax')(attention) attention = RepeatVector(64)(attention) attention = Permute([2, 1])(attention) print(activations.get_shape()) #(?, ?, 64) print(attention.get_shape()) #(?, ?, 64) activation = multiply([attention, activations]) sent_representation = Lambda(lambda x_train: K.sum(x_train, axis=1), output_shape=(5000, ))(sent_representation) print(sent_representation.get_shape()) #(?, 64) probabilities = Dense(1, activation='softmax')( sent_representation) #Expected (5000,) model = Model(inputs=encoder_inputs, outputs=probabilities) model.summary() # Define an input sequence and process it. encoder_inputs = Input(shape=(None, num_encoder_tokens)) encoder = LSTM(latent_dim, return_sequences=True) encoder_outputs, state_h, state_c = encoder(encoder_inputs) # We discard `encoder_outputs` and only keep the states. encoder_states = [state_h, state_c] # Set up the decoder, using `encoder_states` as initial state. decoder_inputs = Input(shape=(None, num_decoder_tokens)) # We set up our decoder to return full output sequences,