Esempio n. 1
0
def branch_attention(cost_volume_3d, cost_volume_h, cost_volume_v,
                     cost_volume_45, cost_volume_135):
    feature = 4 * 9
    k = 9
    label = 9
    cost1 = convbn(cost_volume_3d, 6, 3, 1, 1)
    cost1 = Activation('relu')(cost1)
    cost1 = convbn(cost1, 4, 3, 1, 1)
    cost1 = Activation('sigmoid')(cost1)
    cost_h = Lambda(lambda y: K.repeat_elements(
        K.expand_dims(y[:, :, :, :1], 1), 9, 1))(cost1)
    cost_h = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost_h)
    cost_v = Lambda(lambda y: K.repeat_elements(
        K.expand_dims(y[:, :, :, 1:2], 1), 9, 1))(cost1)
    cost_v = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost_v)
    cost_45 = Lambda(lambda y: K.repeat_elements(
        K.expand_dims(y[:, :, :, 2:3], 1), 9, 1))(cost1)
    cost_45 = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost_45)
    cost_135 = Lambda(lambda y: K.repeat_elements(
        K.expand_dims(y[:, :, :, 3:4], 1), 9, 1))(cost1)
    cost_135 = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost_135)
    return concatenate([
        multiply([cost_h, cost_volume_h]),
        multiply([cost_v, cost_volume_v]),
        multiply([cost_45, cost_volume_45]),
        multiply([cost_135, cost_volume_135])
    ],
                       axis=4), cost1
Esempio n. 2
0
def disparityregression(input):
    shape = K.shape(input)
    disparity_values = np.linspace(-4, 4, 9)
    x = K.constant(disparity_values, shape=[9])
    x = K.expand_dims(K.expand_dims(K.expand_dims(x, 0), 0), 0)
    x = tf.tile(x, [shape[0], shape[1], shape[2], 1])
    out = K.sum(multiply([input, x]), -1)
    return out
Esempio n. 3
0
def channel_attention_free(cost_volume):
    x = GlobalAveragePooling3D()(cost_volume)
    x = Lambda(
        lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x)
    x = Conv3D(170, 1, 1, 'same')(x)
    x = Activation('relu')(x)
    x = Conv3D(81, 1, 1, 'same')(x)
    x = Activation('sigmoid')(x)
    attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x)
    x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention)
    return multiply([x, cost_volume]), attention
Esempio n. 4
0
def to_3d_135(cost_volume_135):
    feature = 4 * 9
    channel_135 = GlobalAveragePooling3D(
        data_format='channels_last')(cost_volume_135)
    channel_135 = Lambda(lambda y: K.expand_dims(
        K.expand_dims(K.expand_dims(y, 1), 1), 1))(channel_135)
    channel_135 = Conv3D(feature / 2,
                         1,
                         1,
                         'same',
                         data_format='channels_last')(channel_135)
    channel_135 = Activation('relu')(channel_135)
    channel_135 = Conv3D(3, 1, 1, 'same',
                         data_format='channels_last')(channel_135)
    channel_135 = Activation('sigmoid')(channel_135)
    channel_135 = Lambda(lambda y: K.concatenate([
        y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1],
        y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3],
        y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3]
    ],
                                                 axis=-1))(channel_135)
    channel_135 = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 9)))(
        channel_135)
    channel_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(channel_135)
    cv_135_tmp = multiply([channel_135, cost_volume_135])
    cv_135_tmp = Conv3D(feature / 2, 1, 1, 'same',
                        data_format='channels_last')(cv_135_tmp)
    cv_135_tmp = Activation('relu')(cv_135_tmp)
    cv_135_tmp = Conv3D(3, 1, 1, 'same',
                        data_format='channels_last')(cv_135_tmp)
    cv_135_tmp = Activation('sigmoid')(cv_135_tmp)
    attention_135 = Lambda(lambda y: K.concatenate([
        y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1],
        y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3],
        y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3]
    ],
                                                   axis=-1))(cv_135_tmp)
    attention_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(
        attention_135)
    cv_135_multi = multiply([attention_135, cost_volume_135])
    dres3 = convbn_3d(cv_135_multi, feature, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(cv_135_multi, feature / 4, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(dres3, 1, 3, 1)
    cost3 = Activation('relu')(dres3)
    cost3 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1),
                                                  (0, 2, 3, 1)))(cost3)
    return cost3, cv_135_multi
Esempio n. 5
0
def channel_attention_mirror(cost_volume):
    x = GlobalAveragePooling3D()(cost_volume)
    x = Lambda(
        lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x)
    x = Conv3D(170, 1, 1, 'same')(x)
    x = Activation('relu')(x)
    x = Conv3D(25, 1, 1, 'same')(x)
    x = Activation('sigmoid')(x)
    x = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 5, 5)))(x)
    x = Lambda(lambda y: tf.pad(y, [[0, 0], [0, 4], [0, 4]], 'REFLECT'))(x)
    attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x)
    x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention)
    return multiply([x, cost_volume]), attention
Esempio n. 6
0
def spatial_attention(cost_volume):
    feature = 4 * 9
    k = 9
    label = 9
    dres0 = convbn_3d(cost_volume, feature / 2, 3, 1)
    dres0 = Activation('relu')(dres0)
    dres0 = convbn_3d(dres0, 1, 3, 1)
    cost0 = Activation('relu')(dres0)

    cost0 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1),
                                                  (0, 2, 3, 1)))(cost0)

    cost1 = convbn(cost0, label // 2, (1, k), 1, 1)
    cost1 = Activation('relu')(cost1)
    cost1 = convbn(cost1, 1, (k, 1), 1, 1)
    cost1 = Activation('relu')(cost1)

    cost2 = convbn(cost0, label // 2, (k, 1), 1, 1)
    cost2 = Activation('relu')(cost2)
    cost2 = convbn(cost2, 1, (1, k), 1, 1)
    cost2 = Activation('relu')(cost2)

    cost = add([cost1, cost2])
    cost = Activation('sigmoid')(cost)

    cost = Lambda(lambda y: K.repeat_elements(K.expand_dims(y, 1), 9, 1))(cost)
    cost = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost)
    return multiply([cost, cost_volume])
Esempio n. 7
0
def atari_qnet(input_shape, num_actions, net_name, net_size):
    net_name = net_name.lower()

    # input state
    state = Input(shape=input_shape)

    # convolutional layers
    conv1_32 = Conv2D(32, (8, 8), strides=(4, 4), activation='relu')
    conv2_64 = Conv2D(64, (4, 4), strides=(2, 2), activation='relu')
    conv3_64 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu')

    # if recurrent net then change input shape
    if 'drqn' in net_name:
        # recurrent net (drqn)
        lambda_perm_state = lambda x: K.permute_dimensions(x, [0, 3, 1, 2])
        perm_state = Lambda(lambda_perm_state)(state)
        dist_state = Lambda(lambda x: K.stack([x], axis=4))(perm_state)

        # extract features with `TimeDistributed` wrapped convolutional layers
        dist_conv1 = TimeDistributed(conv1_32)(dist_state)
        dist_conv2 = TimeDistributed(conv2_64)(dist_conv1)
        dist_convf = TimeDistributed(conv3_64)(dist_conv2)
        feature = TimeDistributed(Flatten())(dist_convf)
    elif 'dqn' in net_name:
        # fully connected net (dqn)
        # extract features with convolutional layers
        conv1 = conv1_32(state)
        conv2 = conv2_64(conv1)
        convf = conv3_64(conv2)
        feature = Flatten()(convf)

    # network type. Dense for dqn; LSTM or GRU for drqn
    if 'lstm' in net_name:
        net_type = LSTM
    elif 'gru' in net_name:
        net_type = GRU
    else:
        net_type = Dense

    # dueling or regular dqn/drqn
    if 'dueling' in net_name:
        value1 = net_type(net_size, activation='relu')(feature)
        adv1 = net_type(net_size, activation='relu')(feature)
        value2 = Dense(1)(value1)
        adv2 = Dense(num_actions)(adv1)
        mean_adv2 = Lambda(lambda x: K.mean(x, axis=1))(adv2)
        ones = K.ones([1, num_actions])
        lambda_exp = lambda x: K.dot(K.expand_dims(x, axis=1), -ones)
        exp_mean_adv2 = Lambda(lambda_exp)(mean_adv2)
        sum_adv = add([exp_mean_adv2, adv2])
        exp_value2 = Lambda(lambda x: K.dot(x, ones))(value2)
        q_value = add([exp_value2, sum_adv])
    else:
        hid = net_type(net_size, activation='relu')(feature)
        q_value = Dense(num_actions)(hid)

    # build model
    return Model(inputs=state, outputs=q_value)
Esempio n. 8
0
def channel_attention(cost_volume):
    x = GlobalAveragePooling3D()(cost_volume)
    x = Lambda(
        lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x)
    x = Conv3D(170, 1, 1, 'same')(x)
    x = Activation('relu')(x)
    x = Conv3D(15, 1, 1, 'same')(x)  # [B, 1, 1, 1, 15]
    x = Activation('sigmoid')(x)

    # 15 -> 25
    # 0  1  2  3  4
    #    5  6  7  8
    #       9 10 11
    #         12 13
    #            14
    #
    # 0  1  2  3  4
    # 1  5  6  7  8
    # 2  6  9 10 11
    # 3  7 10 12 13
    # 4  8 11 13 14

    x = Lambda(lambda y: K.concatenate([
        y[:, :, :, :, 0:5], y[:, :, :, :, 1:2], y[:, :, :, :, 5:9],
        y[:, :, :, :, 2:3], y[:, :, :, :, 6:7], y[:, :, :, :, 9:12],
        y[:, :, :, :, 3:4], y[:, :, :, :, 7:8], y[:, :, :, :, 10:11],
        y[:, :, :, :, 12:14], y[:, :, :, :, 4:5], y[:, :, :, :, 8:9],
        y[:, :, :, :, 11:12], y[:, :, :, :, 13:15]
    ],
                                       axis=-1))(x)

    x = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 5, 5)))(x)
    x = Lambda(lambda y: tf.pad(y, [[0, 0], [0, 4], [0, 4]], 'REFLECT'))(x)
    attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x)
    x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention)
    return multiply([x, cost_volume]), attention
Esempio n. 9
0
    def _attention_model(self, a, h_prev, Ex_t):
        with tf.variable_scope(self.my_scope) as var_scope:
            with tf.name_scope(var_scope.original_name_scope):
                with tf.variable_scope('AttentionModel'):
                    CONF = self.C
                    B = self.ActualBatchSize
                    L = CONF.L
                    D = CONF.D
                    h = h_prev
                    n = self.output_size
                    m = CONF.m

                    self.assertOutputShape(h_prev)
                    assert K.int_shape(a) == (B, L, D)
                    assert K.int_shape(h_prev) == (B, n)
                    assert K.int_shape(Ex_t) == (B, m)

                    if (CONF.att_model == 'MLP_shared') or (CONF.att_model
                                                            == '1x1_conv'):
                        """
                        Here we'll effectively create L MLP stacks all sharing the same weights. Each
                        stack receives a concatenated vector of a(l) and h as input.
                        """
                        # h.shape = (B,n). Expand it to (B,1,n) and then broadcast to (B,L,n) in order
                        # to concatenate with feature vectors of 'a' whose shape=(B,L,D)
                        h = tf.identity(K.tile(K.expand_dims(h, axis=1),
                                               (1, L, 1)),
                                        name='h_t-1')
                        a = tf.identity(a, name='a')
                        if CONF.feed_clock_to_att:
                            assert CONF.build_scanning_RNN, 'Attention model can take Ex_t only in a scanning-LSTM'
                            # Ex_t.shape = (B,m). Expand it to (B,1,m) and then broadcast to (B,L,m) in order
                            # to concatenate with feature vectors of 'a' whose shape=(B,L,D)
                            x = tf.identity(K.tile(K.expand_dims(Ex_t, axis=1),
                                                   (1, L, 1)),
                                            name='Ex_t')
                            # Concatenate a, h nd x. Final shape = (B, L, D+n+m)
                            att_inp = tf.concat([a, h, x], -1,
                                                name='ai_h_x')  # (B, L, D+n+m)
                            assert K.int_shape(att_inp) == (B, L, D + n + m)
                        else:
                            # Concatenate a and h. Final shape = (B, L, D+n)
                            att_inp = tf.concat([a, h], -1,
                                                name='ai_h')  # (B, L, D+n)
                            assert K.int_shape(att_inp) == (B, L, D + n)

                        if CONF.att_model == 'MLP_shared':
                            ## For #layers > 1 this implementation will endup being different than the paper's implementation because they only
                            ## Below is how it is implemented in the code released by the authors of the paper
                            ##     for i in range(1, CONF.att_a_layers+1):
                            ##         if not last_layer:
                            ##              a = Dense(CONF['att_a_%d_n'%(i,)], activation=tanh)(a)
                            ##         else: # last-layer
                            ##              a = AffineTransform(CONF['att_a_%d_n'%(i,)])(a)
                            ##     h = AffineTransform(CONF['att_h_%d_n'%(i,)])(h)
                            ##     ah = a + K.expand_dims(h, axis=1)
                            ##     ah = tanh(ah)
                            ##     alpha = Dense(softmax_layer_params, activation=softmax)(ah)

                            alpha_1_ = tfc.MLPStack(CONF.att_layers)(
                                att_inp)  # (B, L, 1)
                            assert K.int_shape(alpha_1_) == (B, L, 1
                                                             )  # (B, L, 1)
                            alpha_ = K.squeeze(alpha_1_,
                                               axis=2)  # output shape = (B, L)
                            assert K.int_shape(alpha_) == (B, L)

                        elif CONF.att_model == '1x1_conv':
                            """
                            NOTE: The above model ('MLP_shared') tantamounts to a 
                            1x1 convolution on the Lx1 shaped (L=H.W) convnet features with num_channels=D i.e. an input shape of (H,W,C) or (1,L,D).
                            Using 'dimctx' kernels of size (1,1) and stride=1 resulting in an output shape of (1,L,dimctx) [or (B, L, 1, dimctx) with the batch dimension included].
                            This option provides such a convnet layer implementation (which turns out not to be faster than MLP_shared).
                            """
                            att_inp = tf.expand_dims(att_inp, axis=1)
                            alpha_1_ = tfc.ConvStack(
                                CONF.att_layers,
                                (B, 1, L, D + self.output_size))(att_inp)
                            assert K.int_shape(alpha_1_) == (B, 1, L, 1)
                            alpha_ = tf.squeeze(alpha_1_, axis=[1,
                                                                3])  # (B, L)
                            assert K.int_shape(alpha_) == (B, L)

                    elif CONF.att_model == 'MLP_full':  # MLP: weights not shared across L
                        ## concatenate a and h_prev and pass them through a MLP. This is different than the theano
                        ## implementation of the paper because we flatten a from (B,L,D) to (B,L*D). Hence each element
                        ## of the L*D vector receives its own weight because the effective weight matrix here would be
                        ## shape (L*D, num_dense_units) as compared to (D, num_dense_units) as in the shared_weights case

                        ## Concatenate a and h. Final shape will be (B, L*D+n)
                        with tf.variable_scope('a_h'):
                            a_ = K.batch_flatten(a)  # (B, L*D)
                            a_.set_shape(
                                (B, L * D))  # Flatten loses shape info
                            if CONF.build_scanning_RNN and CONF.feed_clock_to_att:
                                assert CONF.build_scanning_RNN, 'Attention model can take Ex_t only in a scanning-LSTM'
                                att_inp = tf.concat(
                                    [a_, h, Ex_t], -1,
                                    name="a_h_x")  # (B, L*D + n + m)
                                assert K.int_shape(att_inp) == (
                                    B, L * D + self.output_size +
                                    m), 'shape %s != %s' % (
                                        K.int_shape(att_inp),
                                        (B, L * D + self.output_size + m))
                            else:
                                att_inp = tf.concat([a_, h], -1,
                                                    name="a_h")  # (B, L*D + n)
                                assert K.int_shape(att_inp) == (
                                    B, L * D +
                                    self.output_size), 'shape %s != %s' % (
                                        K.int_shape(att_inp),
                                        (B, L * D + self.output_size))
                        alpha_ = tfc.MLPStack(CONF.att_layers)(
                            att_inp)  # (B, L)
                        assert K.int_shape(alpha_) == (B, L)

                    else:
                        raise AttributeError(
                            'Invalid value of att_model param: %s' %
                            CONF.att_model)

                    ## Softmax
                    alpha = tf.identity(tf.nn.softmax(alpha_), name='alpha')
                    assert K.int_shape(alpha) == (B, L)

                    ## Attention Modulator: Beta
                    if CONF.build_att_modulator:
                        beta = tfc.MLPStack(CONF.att_modulator,
                                            self.batch_output_shape)(h_prev)
                        beta = tf.identity(beta, name='beta')
                    else:
                        beta = tf.constant(1., shape=(B, 1), dtype=CONF.dtype)
                    assert K.int_shape(beta) == (B, 1)

                    return alpha, beta