def spatial_attention(cost_volume): feature = 4 * 9 k = 9 label = 9 dres0 = convbn_3d(cost_volume, feature / 2, 3, 1) dres0 = Activation('relu')(dres0) dres0 = convbn_3d(dres0, 1, 3, 1) cost0 = Activation('relu')(dres0) cost0 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost0) cost1 = convbn(cost0, label // 2, (1, k), 1, 1) cost1 = Activation('relu')(cost1) cost1 = convbn(cost1, 1, (k, 1), 1, 1) cost1 = Activation('relu')(cost1) cost2 = convbn(cost0, label // 2, (k, 1), 1, 1) cost2 = Activation('relu')(cost2) cost2 = convbn(cost2, 1, (1, k), 1, 1) cost2 = Activation('relu')(cost2) cost = add([cost1, cost2]) cost = Activation('sigmoid')(cost) cost = Lambda(lambda y: K.repeat_elements(K.expand_dims(y, 1), 9, 1))(cost) cost = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost) return multiply([cost, cost_volume])
def define_AttMLFNet(sz_input, sz_input2, view_n, learning_rate): """ 4 branches inputs""" input_list = [] for i in range(len(view_n) * 4): input_list.append(Input(shape=(sz_input, sz_input2, 1))) """ 4 branches features""" feature_extraction_layer = feature_extraction(sz_input, sz_input2) feature_list = [] for i in range(len(view_n) * 4): feature_list.append(feature_extraction_layer(input_list[i])) feature_v_list = [] feature_h_list = [] feature_45_list = [] feature_135_list = [] for i in range(9): feature_h_list.append(feature_list[i]) for i in range(9, 18): feature_v_list.append(feature_list[i]) for i in range(18, 27): feature_45_list.append(feature_list[i]) for i in range(27, len(feature_list)): feature_135_list.append(feature_list[i]) """ cost volume """ cv_h = Lambda(_get_h_CostVolume_)(feature_h_list) cv_v = Lambda(_get_v_CostVolume_)(feature_v_list) cv_45 = Lambda(_get_45_CostVolume_)(feature_45_list) cv_135 = Lambda(_get_135_CostVolume_)(feature_135_list) """ intra branch """ cv_h_3d, cv_h_ca = to_3d_h(cv_h) cv_v_3d, cv_v_ca = to_3d_v(cv_v) cv_45_3d, cv_45_ca = to_3d_45(cv_45) cv_135_3d, cv_135_ca = to_3d_135(cv_135) """ inter branch """ cv, attention_4 = branch_attention( multiply([cv_h_3d, cv_v_3d, cv_45_3d, cv_135_3d]), cv_h_ca, cv_v_ca, cv_45_ca, cv_135_ca) """ cost volume regression """ cost = basic(cv) cost = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost) pred = Activation('softmax')(cost) pred = Lambda(disparityregression)(pred) model = Model(inputs=input_list, outputs=[pred]) model.summary() opt = Adam(lr=learning_rate) model.compile(optimizer=opt, loss='mae') return model
def to_3d_135(cost_volume_135): feature = 4 * 9 channel_135 = GlobalAveragePooling3D( data_format='channels_last')(cost_volume_135) channel_135 = Lambda(lambda y: K.expand_dims( K.expand_dims(K.expand_dims(y, 1), 1), 1))(channel_135) channel_135 = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('relu')(channel_135) channel_135 = Conv3D(3, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('sigmoid')(channel_135) channel_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(channel_135) channel_135 = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 9)))( channel_135) channel_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(channel_135) cv_135_tmp = multiply([channel_135, cost_volume_135]) cv_135_tmp = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('relu')(cv_135_tmp) cv_135_tmp = Conv3D(3, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('sigmoid')(cv_135_tmp) attention_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(cv_135_tmp) attention_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))( attention_135) cv_135_multi = multiply([attention_135, cost_volume_135]) dres3 = convbn_3d(cv_135_multi, feature, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 4, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(dres3, 1, 3, 1) cost3 = Activation('relu')(dres3) cost3 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost3) return cost3, cv_135_multi
def define_LFattNet(sz_input, sz_input2, view_n, learning_rate): """ 81 inputs""" input_list = [] for i in range(len(view_n) * len(view_n)): print('input ' + str(i)) input_list.append(Input(shape=(sz_input, sz_input2, 1))) """ 81 features""" feature_extraction_layer = feature_extraction(sz_input, sz_input2) feature_list = [] for i in range(len(view_n) * len(view_n)): print('feature ' + str(i)) feature_list.append(feature_extraction_layer(input_list[i])) """ cost volume """ cv = Lambda(_getCostVolume_)(feature_list) """ channel attention """ cv, attention = channel_attention(cv) """ cost volume regression """ cost = basic(cv) cost = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost) pred = Activation('softmax')(cost) pred = Lambda(disparityregression)(pred) # when training use below # model = Model(inputs=input_list, outputs=[pred]) # when evaluation use below model = Model(inputs=input_list, outputs=[pred, attention]) model.summary() opt = Adam(lr=learning_rate) model.compile(optimizer=opt, loss='mae') return model
def _attention_model(self, a, h_prev, Ex_t): with tf.variable_scope(self.my_scope) as var_scope: with tf.name_scope(var_scope.original_name_scope): with tf.variable_scope('AttentionModel'): CONF = self.C B = self.ActualBatchSize L = CONF.L D = CONF.D h = h_prev n = self.output_size m = CONF.m self.assertOutputShape(h_prev) assert K.int_shape(a) == (B, L, D) assert K.int_shape(h_prev) == (B, n) assert K.int_shape(Ex_t) == (B, m) if (CONF.att_model == 'MLP_shared') or (CONF.att_model == '1x1_conv'): """ Here we'll effectively create L MLP stacks all sharing the same weights. Each stack receives a concatenated vector of a(l) and h as input. """ # h.shape = (B,n). Expand it to (B,1,n) and then broadcast to (B,L,n) in order # to concatenate with feature vectors of 'a' whose shape=(B,L,D) h = tf.identity(K.tile(K.expand_dims(h, axis=1), (1, L, 1)), name='h_t-1') a = tf.identity(a, name='a') if CONF.feed_clock_to_att: assert CONF.build_scanning_RNN, 'Attention model can take Ex_t only in a scanning-LSTM' # Ex_t.shape = (B,m). Expand it to (B,1,m) and then broadcast to (B,L,m) in order # to concatenate with feature vectors of 'a' whose shape=(B,L,D) x = tf.identity(K.tile(K.expand_dims(Ex_t, axis=1), (1, L, 1)), name='Ex_t') # Concatenate a, h nd x. Final shape = (B, L, D+n+m) att_inp = tf.concat([a, h, x], -1, name='ai_h_x') # (B, L, D+n+m) assert K.int_shape(att_inp) == (B, L, D + n + m) else: # Concatenate a and h. Final shape = (B, L, D+n) att_inp = tf.concat([a, h], -1, name='ai_h') # (B, L, D+n) assert K.int_shape(att_inp) == (B, L, D + n) if CONF.att_model == 'MLP_shared': ## For #layers > 1 this implementation will endup being different than the paper's implementation because they only ## Below is how it is implemented in the code released by the authors of the paper ## for i in range(1, CONF.att_a_layers+1): ## if not last_layer: ## a = Dense(CONF['att_a_%d_n'%(i,)], activation=tanh)(a) ## else: # last-layer ## a = AffineTransform(CONF['att_a_%d_n'%(i,)])(a) ## h = AffineTransform(CONF['att_h_%d_n'%(i,)])(h) ## ah = a + K.expand_dims(h, axis=1) ## ah = tanh(ah) ## alpha = Dense(softmax_layer_params, activation=softmax)(ah) alpha_1_ = tfc.MLPStack(CONF.att_layers)( att_inp) # (B, L, 1) assert K.int_shape(alpha_1_) == (B, L, 1 ) # (B, L, 1) alpha_ = K.squeeze(alpha_1_, axis=2) # output shape = (B, L) assert K.int_shape(alpha_) == (B, L) elif CONF.att_model == '1x1_conv': """ NOTE: The above model ('MLP_shared') tantamounts to a 1x1 convolution on the Lx1 shaped (L=H.W) convnet features with num_channels=D i.e. an input shape of (H,W,C) or (1,L,D). Using 'dimctx' kernels of size (1,1) and stride=1 resulting in an output shape of (1,L,dimctx) [or (B, L, 1, dimctx) with the batch dimension included]. This option provides such a convnet layer implementation (which turns out not to be faster than MLP_shared). """ att_inp = tf.expand_dims(att_inp, axis=1) alpha_1_ = tfc.ConvStack( CONF.att_layers, (B, 1, L, D + self.output_size))(att_inp) assert K.int_shape(alpha_1_) == (B, 1, L, 1) alpha_ = tf.squeeze(alpha_1_, axis=[1, 3]) # (B, L) assert K.int_shape(alpha_) == (B, L) elif CONF.att_model == 'MLP_full': # MLP: weights not shared across L ## concatenate a and h_prev and pass them through a MLP. This is different than the theano ## implementation of the paper because we flatten a from (B,L,D) to (B,L*D). Hence each element ## of the L*D vector receives its own weight because the effective weight matrix here would be ## shape (L*D, num_dense_units) as compared to (D, num_dense_units) as in the shared_weights case ## Concatenate a and h. Final shape will be (B, L*D+n) with tf.variable_scope('a_h'): a_ = K.batch_flatten(a) # (B, L*D) a_.set_shape( (B, L * D)) # Flatten loses shape info if CONF.build_scanning_RNN and CONF.feed_clock_to_att: assert CONF.build_scanning_RNN, 'Attention model can take Ex_t only in a scanning-LSTM' att_inp = tf.concat( [a_, h, Ex_t], -1, name="a_h_x") # (B, L*D + n + m) assert K.int_shape(att_inp) == ( B, L * D + self.output_size + m), 'shape %s != %s' % ( K.int_shape(att_inp), (B, L * D + self.output_size + m)) else: att_inp = tf.concat([a_, h], -1, name="a_h") # (B, L*D + n) assert K.int_shape(att_inp) == ( B, L * D + self.output_size), 'shape %s != %s' % ( K.int_shape(att_inp), (B, L * D + self.output_size)) alpha_ = tfc.MLPStack(CONF.att_layers)( att_inp) # (B, L) assert K.int_shape(alpha_) == (B, L) else: raise AttributeError( 'Invalid value of att_model param: %s' % CONF.att_model) ## Softmax alpha = tf.identity(tf.nn.softmax(alpha_), name='alpha') assert K.int_shape(alpha) == (B, L) ## Attention Modulator: Beta if CONF.build_att_modulator: beta = tfc.MLPStack(CONF.att_modulator, self.batch_output_shape)(h_prev) beta = tf.identity(beta, name='beta') else: beta = tf.constant(1., shape=(B, 1), dtype=CONF.dtype) assert K.int_shape(beta) == (B, 1) return alpha, beta