def branch_attention(cost_volume_3d, cost_volume_h, cost_volume_v, cost_volume_45, cost_volume_135): feature = 4 * 9 k = 9 label = 9 cost1 = convbn(cost_volume_3d, 6, 3, 1, 1) cost1 = Activation('relu')(cost1) cost1 = convbn(cost1, 4, 3, 1, 1) cost1 = Activation('sigmoid')(cost1) cost_h = Lambda(lambda y: K.repeat_elements( K.expand_dims(y[:, :, :, :1], 1), 9, 1))(cost1) cost_h = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost_h) cost_v = Lambda(lambda y: K.repeat_elements( K.expand_dims(y[:, :, :, 1:2], 1), 9, 1))(cost1) cost_v = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost_v) cost_45 = Lambda(lambda y: K.repeat_elements( K.expand_dims(y[:, :, :, 2:3], 1), 9, 1))(cost1) cost_45 = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost_45) cost_135 = Lambda(lambda y: K.repeat_elements( K.expand_dims(y[:, :, :, 3:4], 1), 9, 1))(cost1) cost_135 = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost_135) return concatenate([ multiply([cost_h, cost_volume_h]), multiply([cost_v, cost_volume_v]), multiply([cost_45, cost_volume_45]), multiply([cost_135, cost_volume_135]) ], axis=4), cost1
def disparityregression(input): shape = K.shape(input) disparity_values = np.linspace(-4, 4, 9) x = K.constant(disparity_values, shape=[9]) x = K.expand_dims(K.expand_dims(K.expand_dims(x, 0), 0), 0) x = tf.tile(x, [shape[0], shape[1], shape[2], 1]) out = K.sum(multiply([input, x]), -1) return out
def channel_attention_free(cost_volume): x = GlobalAveragePooling3D()(cost_volume) x = Lambda( lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x) x = Conv3D(170, 1, 1, 'same')(x) x = Activation('relu')(x) x = Conv3D(81, 1, 1, 'same')(x) x = Activation('sigmoid')(x) attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x) x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention) return multiply([x, cost_volume]), attention
def to_3d_135(cost_volume_135): feature = 4 * 9 channel_135 = GlobalAveragePooling3D( data_format='channels_last')(cost_volume_135) channel_135 = Lambda(lambda y: K.expand_dims( K.expand_dims(K.expand_dims(y, 1), 1), 1))(channel_135) channel_135 = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('relu')(channel_135) channel_135 = Conv3D(3, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('sigmoid')(channel_135) channel_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(channel_135) channel_135 = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 9)))( channel_135) channel_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(channel_135) cv_135_tmp = multiply([channel_135, cost_volume_135]) cv_135_tmp = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('relu')(cv_135_tmp) cv_135_tmp = Conv3D(3, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('sigmoid')(cv_135_tmp) attention_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(cv_135_tmp) attention_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))( attention_135) cv_135_multi = multiply([attention_135, cost_volume_135]) dres3 = convbn_3d(cv_135_multi, feature, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 4, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(dres3, 1, 3, 1) cost3 = Activation('relu')(dres3) cost3 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost3) return cost3, cv_135_multi
def channel_attention_mirror(cost_volume): x = GlobalAveragePooling3D()(cost_volume) x = Lambda( lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x) x = Conv3D(170, 1, 1, 'same')(x) x = Activation('relu')(x) x = Conv3D(25, 1, 1, 'same')(x) x = Activation('sigmoid')(x) x = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 5, 5)))(x) x = Lambda(lambda y: tf.pad(y, [[0, 0], [0, 4], [0, 4]], 'REFLECT'))(x) attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x) x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention) return multiply([x, cost_volume]), attention
def spatial_attention(cost_volume): feature = 4 * 9 k = 9 label = 9 dres0 = convbn_3d(cost_volume, feature / 2, 3, 1) dres0 = Activation('relu')(dres0) dres0 = convbn_3d(dres0, 1, 3, 1) cost0 = Activation('relu')(dres0) cost0 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost0) cost1 = convbn(cost0, label // 2, (1, k), 1, 1) cost1 = Activation('relu')(cost1) cost1 = convbn(cost1, 1, (k, 1), 1, 1) cost1 = Activation('relu')(cost1) cost2 = convbn(cost0, label // 2, (k, 1), 1, 1) cost2 = Activation('relu')(cost2) cost2 = convbn(cost2, 1, (1, k), 1, 1) cost2 = Activation('relu')(cost2) cost = add([cost1, cost2]) cost = Activation('sigmoid')(cost) cost = Lambda(lambda y: K.repeat_elements(K.expand_dims(y, 1), 9, 1))(cost) cost = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost) return multiply([cost, cost_volume])
def atari_qnet(input_shape, num_actions, net_name, net_size): net_name = net_name.lower() # input state state = Input(shape=input_shape) # convolutional layers conv1_32 = Conv2D(32, (8, 8), strides=(4, 4), activation='relu') conv2_64 = Conv2D(64, (4, 4), strides=(2, 2), activation='relu') conv3_64 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu') # if recurrent net then change input shape if 'drqn' in net_name: # recurrent net (drqn) lambda_perm_state = lambda x: K.permute_dimensions(x, [0, 3, 1, 2]) perm_state = Lambda(lambda_perm_state)(state) dist_state = Lambda(lambda x: K.stack([x], axis=4))(perm_state) # extract features with `TimeDistributed` wrapped convolutional layers dist_conv1 = TimeDistributed(conv1_32)(dist_state) dist_conv2 = TimeDistributed(conv2_64)(dist_conv1) dist_convf = TimeDistributed(conv3_64)(dist_conv2) feature = TimeDistributed(Flatten())(dist_convf) elif 'dqn' in net_name: # fully connected net (dqn) # extract features with convolutional layers conv1 = conv1_32(state) conv2 = conv2_64(conv1) convf = conv3_64(conv2) feature = Flatten()(convf) # network type. Dense for dqn; LSTM or GRU for drqn if 'lstm' in net_name: net_type = LSTM elif 'gru' in net_name: net_type = GRU else: net_type = Dense # dueling or regular dqn/drqn if 'dueling' in net_name: value1 = net_type(net_size, activation='relu')(feature) adv1 = net_type(net_size, activation='relu')(feature) value2 = Dense(1)(value1) adv2 = Dense(num_actions)(adv1) mean_adv2 = Lambda(lambda x: K.mean(x, axis=1))(adv2) ones = K.ones([1, num_actions]) lambda_exp = lambda x: K.dot(K.expand_dims(x, axis=1), -ones) exp_mean_adv2 = Lambda(lambda_exp)(mean_adv2) sum_adv = add([exp_mean_adv2, adv2]) exp_value2 = Lambda(lambda x: K.dot(x, ones))(value2) q_value = add([exp_value2, sum_adv]) else: hid = net_type(net_size, activation='relu')(feature) q_value = Dense(num_actions)(hid) # build model return Model(inputs=state, outputs=q_value)
def channel_attention(cost_volume): x = GlobalAveragePooling3D()(cost_volume) x = Lambda( lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x) x = Conv3D(170, 1, 1, 'same')(x) x = Activation('relu')(x) x = Conv3D(15, 1, 1, 'same')(x) # [B, 1, 1, 1, 15] x = Activation('sigmoid')(x) # 15 -> 25 # 0 1 2 3 4 # 5 6 7 8 # 9 10 11 # 12 13 # 14 # # 0 1 2 3 4 # 1 5 6 7 8 # 2 6 9 10 11 # 3 7 10 12 13 # 4 8 11 13 14 x = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:5], y[:, :, :, :, 1:2], y[:, :, :, :, 5:9], y[:, :, :, :, 2:3], y[:, :, :, :, 6:7], y[:, :, :, :, 9:12], y[:, :, :, :, 3:4], y[:, :, :, :, 7:8], y[:, :, :, :, 10:11], y[:, :, :, :, 12:14], y[:, :, :, :, 4:5], y[:, :, :, :, 8:9], y[:, :, :, :, 11:12], y[:, :, :, :, 13:15] ], axis=-1))(x) x = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 5, 5)))(x) x = Lambda(lambda y: tf.pad(y, [[0, 0], [0, 4], [0, 4]], 'REFLECT'))(x) attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x) x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention) return multiply([x, cost_volume]), attention
def _attention_model(self, a, h_prev, Ex_t): with tf.variable_scope(self.my_scope) as var_scope: with tf.name_scope(var_scope.original_name_scope): with tf.variable_scope('AttentionModel'): CONF = self.C B = self.ActualBatchSize L = CONF.L D = CONF.D h = h_prev n = self.output_size m = CONF.m self.assertOutputShape(h_prev) assert K.int_shape(a) == (B, L, D) assert K.int_shape(h_prev) == (B, n) assert K.int_shape(Ex_t) == (B, m) if (CONF.att_model == 'MLP_shared') or (CONF.att_model == '1x1_conv'): """ Here we'll effectively create L MLP stacks all sharing the same weights. Each stack receives a concatenated vector of a(l) and h as input. """ # h.shape = (B,n). Expand it to (B,1,n) and then broadcast to (B,L,n) in order # to concatenate with feature vectors of 'a' whose shape=(B,L,D) h = tf.identity(K.tile(K.expand_dims(h, axis=1), (1, L, 1)), name='h_t-1') a = tf.identity(a, name='a') if CONF.feed_clock_to_att: assert CONF.build_scanning_RNN, 'Attention model can take Ex_t only in a scanning-LSTM' # Ex_t.shape = (B,m). Expand it to (B,1,m) and then broadcast to (B,L,m) in order # to concatenate with feature vectors of 'a' whose shape=(B,L,D) x = tf.identity(K.tile(K.expand_dims(Ex_t, axis=1), (1, L, 1)), name='Ex_t') # Concatenate a, h nd x. Final shape = (B, L, D+n+m) att_inp = tf.concat([a, h, x], -1, name='ai_h_x') # (B, L, D+n+m) assert K.int_shape(att_inp) == (B, L, D + n + m) else: # Concatenate a and h. Final shape = (B, L, D+n) att_inp = tf.concat([a, h], -1, name='ai_h') # (B, L, D+n) assert K.int_shape(att_inp) == (B, L, D + n) if CONF.att_model == 'MLP_shared': ## For #layers > 1 this implementation will endup being different than the paper's implementation because they only ## Below is how it is implemented in the code released by the authors of the paper ## for i in range(1, CONF.att_a_layers+1): ## if not last_layer: ## a = Dense(CONF['att_a_%d_n'%(i,)], activation=tanh)(a) ## else: # last-layer ## a = AffineTransform(CONF['att_a_%d_n'%(i,)])(a) ## h = AffineTransform(CONF['att_h_%d_n'%(i,)])(h) ## ah = a + K.expand_dims(h, axis=1) ## ah = tanh(ah) ## alpha = Dense(softmax_layer_params, activation=softmax)(ah) alpha_1_ = tfc.MLPStack(CONF.att_layers)( att_inp) # (B, L, 1) assert K.int_shape(alpha_1_) == (B, L, 1 ) # (B, L, 1) alpha_ = K.squeeze(alpha_1_, axis=2) # output shape = (B, L) assert K.int_shape(alpha_) == (B, L) elif CONF.att_model == '1x1_conv': """ NOTE: The above model ('MLP_shared') tantamounts to a 1x1 convolution on the Lx1 shaped (L=H.W) convnet features with num_channels=D i.e. an input shape of (H,W,C) or (1,L,D). Using 'dimctx' kernels of size (1,1) and stride=1 resulting in an output shape of (1,L,dimctx) [or (B, L, 1, dimctx) with the batch dimension included]. This option provides such a convnet layer implementation (which turns out not to be faster than MLP_shared). """ att_inp = tf.expand_dims(att_inp, axis=1) alpha_1_ = tfc.ConvStack( CONF.att_layers, (B, 1, L, D + self.output_size))(att_inp) assert K.int_shape(alpha_1_) == (B, 1, L, 1) alpha_ = tf.squeeze(alpha_1_, axis=[1, 3]) # (B, L) assert K.int_shape(alpha_) == (B, L) elif CONF.att_model == 'MLP_full': # MLP: weights not shared across L ## concatenate a and h_prev and pass them through a MLP. This is different than the theano ## implementation of the paper because we flatten a from (B,L,D) to (B,L*D). Hence each element ## of the L*D vector receives its own weight because the effective weight matrix here would be ## shape (L*D, num_dense_units) as compared to (D, num_dense_units) as in the shared_weights case ## Concatenate a and h. Final shape will be (B, L*D+n) with tf.variable_scope('a_h'): a_ = K.batch_flatten(a) # (B, L*D) a_.set_shape( (B, L * D)) # Flatten loses shape info if CONF.build_scanning_RNN and CONF.feed_clock_to_att: assert CONF.build_scanning_RNN, 'Attention model can take Ex_t only in a scanning-LSTM' att_inp = tf.concat( [a_, h, Ex_t], -1, name="a_h_x") # (B, L*D + n + m) assert K.int_shape(att_inp) == ( B, L * D + self.output_size + m), 'shape %s != %s' % ( K.int_shape(att_inp), (B, L * D + self.output_size + m)) else: att_inp = tf.concat([a_, h], -1, name="a_h") # (B, L*D + n) assert K.int_shape(att_inp) == ( B, L * D + self.output_size), 'shape %s != %s' % ( K.int_shape(att_inp), (B, L * D + self.output_size)) alpha_ = tfc.MLPStack(CONF.att_layers)( att_inp) # (B, L) assert K.int_shape(alpha_) == (B, L) else: raise AttributeError( 'Invalid value of att_model param: %s' % CONF.att_model) ## Softmax alpha = tf.identity(tf.nn.softmax(alpha_), name='alpha') assert K.int_shape(alpha) == (B, L) ## Attention Modulator: Beta if CONF.build_att_modulator: beta = tfc.MLPStack(CONF.att_modulator, self.batch_output_shape)(h_prev) beta = tf.identity(beta, name='beta') else: beta = tf.constant(1., shape=(B, 1), dtype=CONF.dtype) assert K.int_shape(beta) == (B, 1) return alpha, beta