def fpn_classifier_graph(rois, feature_maps, image_shape, pool_size, num_classes): """Builds the computation graph of the feature pyramid network classifier and regressor heads. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from diffent layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. image_shape: [height, width, depth] pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results Returns: logits: [N, NUM_CLASSES] classifier logits (before softmax) probs: [N, NUM_CLASSES] classifier probabilities bbox_deltas: [N, (dy, dx, log(dh), log(dw))] Deltas to apply to proposal boxes """ # ROI Pooling # Shape: [batch, num_boxes, pool_height, pool_width, channels] x = PyramidROIAlign([pool_size, pool_size], image_shape, name="roi_align_classifier")([rois] + feature_maps) print x.get_shape() # Two 1024 FC layers (implemented with Conv2D for consistency) x = TimeDistributed(Conv2D(1024, (pool_size, pool_size), padding="valid"), name="mrcnn_class_conv1")(x) x = TimeDistributed(BatchNorm(axis=3), name='mrcnn_class_bn1')(x) x = Activation('relu')(x) x = TimeDistributed(Conv2D(1024, (1, 1)), name="mrcnn_class_conv2")(x) x = TimeDistributed(BatchNorm(axis=3), name='mrcnn_class_bn2')(x) x = Activation('relu')(x) shared = Lambda(lambda x: K.squeeze(K.squeeze(x, 4), 3), name="pool_squeeze")(x) print shared.get_shape() # Classifier head mrcnn_class_logits = TimeDistributed(Dense(num_classes), name='mrcnn_class_logits')(shared) mrcnn_probs = TimeDistributed(Activation("softmax"), name="mrcnn_class")(mrcnn_class_logits) print mrcnn_class_logits.get_shape() print mrcnn_probs.get_shape() # BBox head # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))] x = TimeDistributed(Dense(num_classes * 4, activation='linear'), name='mrcnn_bbox_fc')(shared) # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))] s = tf.shape(x) mrcnn_bbox = Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x) return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
if t > 0: # decoder_target_data will be ahead by one timestep # and will not include the start character. decoder_target_data[i, t - 1, target_token_index[char]] = 1. encoder_inputs = Input(shape=(None, num_encoder_tokens)) print(encoder_inputs.get_shape()) #(?, 80) activations = LSTM(64, return_sequences=True)(encoder_inputs) attention = TimeDistributed(Dense(1, activation='tanh'))(activations) attention = Flatten()(attention) attention = Activation('softmax')(attention) attention = RepeatVector(64)(attention) attention = Permute([2, 1])(attention) print(activations.get_shape()) #(?, ?, 64) print(attention.get_shape()) #(?, ?, 64) activation = multiply([attention, activations]) sent_representation = Lambda(lambda x_train: K.sum(x_train, axis=1), output_shape=(5000, ))(sent_representation) print(sent_representation.get_shape()) #(?, 64) probabilities = Dense(1, activation='softmax')( sent_representation) #Expected (5000,) model = Model(inputs=encoder_inputs, outputs=probabilities) model.summary() # Define an input sequence and process it. encoder_inputs = Input(shape=(None, num_encoder_tokens)) encoder = LSTM(latent_dim, return_sequences=True) encoder_outputs, state_h, state_c = encoder(encoder_inputs)