def layer1_multistream(input_dim1, input_dim2, input_dim3, filt_num, channelImage): seq = Sequential() ''' Multi-Stream layer : Conv - Relu - Conv - BN - Relu ''' #seq.add(Reshape((input_dim1,input_dim2,input_dim3),input_shape=(input_dim1, input_dim2, input_dim3,1))) for i in range(3): #seq.add(Conv2D(int(filt_num),(2,2),input_shape=(input_dim1, input_dim2, input_dim3), padding='valid', name='S1_c1%d' %(i),data_format='channels_last' )) seq.add( Conv3D(int(filt_num), (2, 2, 2), input_shape=(input_dim1, input_dim2, input_dim3, channelImage), padding='valid', name='S1_c1%d' % (i), data_format='channels_last')) seq.add(Activation('relu', name='S1_relu1%d' % (i))) seq.add( Conv3D(int(filt_num), (2, 2, 2), padding='valid', name='S1_c2%d' % (i), data_format='channels_last')) seq.add(BatchNormalization(axis=-1, name='S1_BN%d' % (i))) seq.add(Activation('relu', name='S1_relu2%d' % (i))) #seq.add(Reshape((input_dim1-6,input_dim2-6,int(filt_num)))) return seq
def channel_attention_free(cost_volume): x = GlobalAveragePooling3D()(cost_volume) x = Lambda( lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x) x = Conv3D(170, 1, 1, 'same')(x) x = Activation('relu')(x) x = Conv3D(81, 1, 1, 'same')(x) x = Activation('sigmoid')(x) attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x) x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention) return multiply([x, cost_volume]), attention
def to_3d_135(cost_volume_135): feature = 4 * 9 channel_135 = GlobalAveragePooling3D( data_format='channels_last')(cost_volume_135) channel_135 = Lambda(lambda y: K.expand_dims( K.expand_dims(K.expand_dims(y, 1), 1), 1))(channel_135) channel_135 = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('relu')(channel_135) channel_135 = Conv3D(3, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('sigmoid')(channel_135) channel_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(channel_135) channel_135 = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 9)))( channel_135) channel_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(channel_135) cv_135_tmp = multiply([channel_135, cost_volume_135]) cv_135_tmp = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('relu')(cv_135_tmp) cv_135_tmp = Conv3D(3, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('sigmoid')(cv_135_tmp) attention_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(cv_135_tmp) attention_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))( attention_135) cv_135_multi = multiply([attention_135, cost_volume_135]) dres3 = convbn_3d(cv_135_multi, feature, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 4, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(dres3, 1, 3, 1) cost3 = Activation('relu')(dres3) cost3 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost3) return cost3, cv_135_multi
def channel_attention_mirror(cost_volume): x = GlobalAveragePooling3D()(cost_volume) x = Lambda( lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x) x = Conv3D(170, 1, 1, 'same')(x) x = Activation('relu')(x) x = Conv3D(25, 1, 1, 'same')(x) x = Activation('sigmoid')(x) x = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 5, 5)))(x) x = Lambda(lambda y: tf.pad(y, [[0, 0], [0, 4], [0, 4]], 'REFLECT'))(x) attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x) x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention) return multiply([x, cost_volume]), attention
def build_models(seq_len=12, num_classes=4, load_weights=False): # DST-Net: ResNet50 resnet = ResNet50(weights='imagenet', include_top=False) for layer in resnet.layers: layer.trainable = False resnet.load_weights('model/resnet.h5') # DST-Net: Conv3D + Bi-LSTM inputs = Input(shape=(seq_len, 7, 7, 2048)) # conv1_1, conv3D and flatten conv1_1 = TimeDistributed(Conv2D(128, 1, 1, activation='relu'))(inputs) conv3d = Conv3D(64, 3, 1, 'SAME', activation='relu')(conv1_1) flatten = Reshape(target_shape=(seq_len, 7 * 7 * 64))(conv3d) # 2 Layers Bi-LSTM bilstm_1 = Bidirectional(LSTM(128, dropout=0.5, return_sequences=True))(flatten) bilstm_2 = Bidirectional(LSTM(128, dropout=0.5, return_sequences=False))(bilstm_1) outputs = Dense(num_classes, activation='softmax')(bilstm_2) dstnet = Model(inputs=inputs, outputs=outputs) dstnet.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.001, momentum=0.9, nesterov=True)) # load models if load_weights: dstnet.load_weights('model/dstnet.h5') return resnet, dstnet
def convbn_3d(input, out_planes, kernel_size, stride): seq = Conv3D(out_planes, kernel_size, stride, 'same', data_format='channels_last', use_bias=False)(input) seq = BatchNormalization()(seq) return seq
def channel_attention(cost_volume): x = GlobalAveragePooling3D()(cost_volume) x = Lambda( lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x) x = Conv3D(170, 1, 1, 'same')(x) x = Activation('relu')(x) x = Conv3D(15, 1, 1, 'same')(x) # [B, 1, 1, 1, 15] x = Activation('sigmoid')(x) # 15 -> 25 # 0 1 2 3 4 # 5 6 7 8 # 9 10 11 # 12 13 # 14 # # 0 1 2 3 4 # 1 5 6 7 8 # 2 6 9 10 11 # 3 7 10 12 13 # 4 8 11 13 14 x = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:5], y[:, :, :, :, 1:2], y[:, :, :, :, 5:9], y[:, :, :, :, 2:3], y[:, :, :, :, 6:7], y[:, :, :, :, 9:12], y[:, :, :, :, 3:4], y[:, :, :, :, 7:8], y[:, :, :, :, 10:11], y[:, :, :, :, 12:14], y[:, :, :, :, 4:5], y[:, :, :, :, 8:9], y[:, :, :, :, 11:12], y[:, :, :, :, 13:15] ], axis=-1))(x) x = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 5, 5)))(x) x = Lambda(lambda y: tf.pad(y, [[0, 0], [0, 4], [0, 4]], 'REFLECT'))(x) attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x) x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention) return multiply([x, cost_volume]), attention
def basic(cost_volume): feature = 2 * 75 dres0 = convbn_3d(cost_volume, feature, 3, 1) dres0 = Activation('relu')(dres0) dres0 = convbn_3d(dres0, feature, 3, 1) cost0 = Activation('relu')(dres0) dres1 = convbn_3d(cost0, feature, 3, 1) dres1 = Activation('relu')(dres1) dres1 = convbn_3d(dres1, feature, 3, 1) cost0 = add([dres1, cost0]) dres4 = convbn_3d(cost0, feature, 3, 1) dres4 = Activation('relu')(dres4) dres4 = convbn_3d(dres4, feature, 3, 1) cost0 = add([dres4, cost0]) classify = convbn_3d(cost0, feature, 3, 1) classify = Activation('relu')(classify) cost = Conv3D(1, 3, 1, 'same', data_format='channels_last', use_bias=False)(classify) return cost