Esempio n. 1
0
def atari_qnet(input_shape, num_actions, net_name, net_size):
    net_name = net_name.lower()

    # input state
    state = Input(shape=input_shape)

    # convolutional layers
    conv1_32 = Conv2D(32, (8, 8), strides=(4, 4), activation='relu')
    conv2_64 = Conv2D(64, (4, 4), strides=(2, 2), activation='relu')
    conv3_64 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu')

    # if recurrent net then change input shape
    if 'drqn' in net_name:
        # recurrent net (drqn)
        lambda_perm_state = lambda x: K.permute_dimensions(x, [0, 3, 1, 2])
        perm_state = Lambda(lambda_perm_state)(state)
        dist_state = Lambda(lambda x: K.stack([x], axis=4))(perm_state)

        # extract features with `TimeDistributed` wrapped convolutional layers
        dist_conv1 = TimeDistributed(conv1_32)(dist_state)
        dist_conv2 = TimeDistributed(conv2_64)(dist_conv1)
        dist_convf = TimeDistributed(conv3_64)(dist_conv2)
        feature = TimeDistributed(Flatten())(dist_convf)
    elif 'dqn' in net_name:
        # fully connected net (dqn)
        # extract features with convolutional layers
        conv1 = conv1_32(state)
        conv2 = conv2_64(conv1)
        convf = conv3_64(conv2)
        feature = Flatten()(convf)

    # network type. Dense for dqn; LSTM or GRU for drqn
    if 'lstm' in net_name:
        net_type = LSTM
    elif 'gru' in net_name:
        net_type = GRU
    else:
        net_type = Dense

    # dueling or regular dqn/drqn
    if 'dueling' in net_name:
        value1 = net_type(net_size, activation='relu')(feature)
        adv1 = net_type(net_size, activation='relu')(feature)
        value2 = Dense(1)(value1)
        adv2 = Dense(num_actions)(adv1)
        mean_adv2 = Lambda(lambda x: K.mean(x, axis=1))(adv2)
        ones = K.ones([1, num_actions])
        lambda_exp = lambda x: K.dot(K.expand_dims(x, axis=1), -ones)
        exp_mean_adv2 = Lambda(lambda_exp)(mean_adv2)
        sum_adv = add([exp_mean_adv2, adv2])
        exp_value2 = Lambda(lambda x: K.dot(x, ones))(value2)
        q_value = add([exp_value2, sum_adv])
    else:
        hid = net_type(net_size, activation='relu')(feature)
        q_value = Dense(num_actions)(hid)

    # build model
    return Model(inputs=state, outputs=q_value)
Esempio n. 2
0
def optimized_trilinear_for_attention(args,
                                      c_maxlen,
                                      q_maxlen,
                                      input_keep_prob=1.0,
                                      scope='efficient_trilinear',
                                      bias_initializer=tf.zeros_initializer(),
                                      kernel_initializer=initializer()):
    assert len(args) == 2, "just use for computing attention with two input"
    arg0_shape = args[0].get_shape().as_list()
    arg1_shape = args[1].get_shape().as_list()
    if len(arg0_shape) != 3 or len(arg1_shape) != 3:
        raise ValueError("`args` must be 3 dims (batch_size, len, dimension)")
    if arg0_shape[2] != arg1_shape[2]:
        raise ValueError("the last dimension of `args` must equal")
    arg_size = arg0_shape[2]
    dtype = args[0].dtype
    droped_args = [tf.nn.dropout(arg, input_keep_prob) for arg in args]
    with tf.variable_scope(scope):
        weights4arg0 = tf.get_variable("linear_kernel4arg0", [arg_size, 1],
                                       dtype=dtype,
                                       regularizer=regularizer,
                                       initializer=kernel_initializer)
        weights4arg1 = tf.get_variable("linear_kernel4arg1", [arg_size, 1],
                                       dtype=dtype,
                                       regularizer=regularizer,
                                       initializer=kernel_initializer)
        weights4mlu = tf.get_variable("linear_kernel4mul", [1, 1, arg_size],
                                      dtype=dtype,
                                      regularizer=regularizer,
                                      initializer=kernel_initializer)
        biases = tf.get_variable("linear_bias", [1],
                                 dtype=dtype,
                                 regularizer=regularizer,
                                 initializer=bias_initializer)
        subres0 = tf.tile(backend.dot(droped_args[0], weights4arg0),
                          [1, 1, q_maxlen])
        subres1 = tf.tile(
            tf.transpose(backend.dot(droped_args[1], weights4arg1),
                         perm=(0, 2, 1)), [1, c_maxlen, 1])
        subres2 = backend.batch_dot(
            droped_args[0] * weights4mlu,
            tf.transpose(droped_args[1], perm=(0, 2, 1)))
        res = subres0 + subres1 + subres2
        res += biases
        return res
Esempio n. 3
0
    def get_initial_states(self, x):
        input_shape = self.input_spec[0].shape
        init_nb_row = input_shape[self.row_axis]
        init_nb_col = input_shape[self.column_axis]

        base_initial_state = K.zeros_like(
            x)  # (batch_samples, timesteps) + image_shape
        non_channel_axis = -2
        for _ in range(2):
            base_initial_state = K.sum(base_initial_state,
                                       axis=non_channel_axis)
        base_initial_state = K.sum(base_initial_state,
                                   axis=1)  # (samples, nb_channels)

        initial_states = []
        states_to_pass = ['r', 'c', 'e']
        nlayers_to_pass = {u: self.nb_layers for u in states_to_pass}
        if self.extrap_start_time is not None:
            # pass prediction in states so can use as actual for t+1 when extrapolating
            states_to_pass.append('ahat')
            nlayers_to_pass['ahat'] = 1
        for u in states_to_pass:  # ['r', 'c', 'e'] is the state
            for l in range(
                    nlayers_to_pass[u]):  # initialize all the state with zero
                ds_factor = 2**l  # why downsampling?
                nb_row = init_nb_row // ds_factor
                nb_col = init_nb_col // ds_factor
                if u in ['r', 'c']:
                    stack_size = self.R_stack_sizes[l]
                elif u == 'e':
                    stack_size = 2 * self.stack_sizes[l]
                elif u == 'ahat':
                    stack_size = self.stack_sizes[l]
                output_size = nb_row * nb_col * stack_size  # flattened size

                reducer = K.zeros((input_shape[self.channel_axis],
                                   output_size))  # (nb_channels, output_size)
                initial_state = K.dot(base_initial_state,
                                      reducer)  # (samples, output_size)
                output_shp = [-1, nb_row, nb_col, stack_size]
                initial_state = K.reshape(initial_state, output_shp)
                initial_states += [initial_state]

        if self.extrap_start_time is not None:
            initial_states += [
                K.variable(0, 'int32')
            ]  # the last state will correspond to the current timestep
        return initial_states
Esempio n. 4
0
def gram_matrix(x):
    features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram
def gram_matrix(x):
    features = backend.batch_flatten(backend.permute_dimensions(x, (2, 0, 1)))
    gram = backend.dot(features, backend.transpose(features))
    return gram