def reward_prediction_mid(input_images):
    """A reward predictor network from intermediate layers.

     The inputs can be any image size (usually the intermediate conv outputs).
     The model runs 3 conv layers on top of each with a dense layer at the end.
     All of these are combined with 2 additional dense layer.

  Args:
    input_images: the input images. size is arbitrary.

  Returns:
    the predicted reward.
  """
    encoded = []
    for i, x in enumerate(input_images):
        enc = x
        enc = tfl.conv2d(enc,
                         16, [3, 3],
                         strides=(1, 1),
                         activation=tf.nn.relu)
        enc = tfl.conv2d(enc, 8, [3, 3], strides=(2, 2), activation=tf.nn.relu)
        enc = tfl.conv2d(enc, 4, [3, 3], strides=(2, 2), activation=tf.nn.relu)
        enc = tfl.flatten(enc)
        enc = tfl.dense(enc, 8, activation=tf.nn.relu, name="rew_enc_%d" % i)
        encoded.append(enc)
    x = encoded
    x = tf.stack(x, axis=1)
    x = tfl.flatten(x)
    x = tfl.dense(x, 32, activation=tf.nn.relu, name="rew_dense1")
    x = tfl.dense(x, 16, activation=tf.nn.relu, name="rew_dense2")
    return x
Example #2
0
    def make_model(self):
        #These are already inside make_model(), commenting out
        ops.reset_default_graph()
        tf.compat.v1.disable_eager_execution()

        # Initializing TensorFlow session
        self.sess = Session(config=ConfigProto(allow_soft_placement=True))

        if self.build_model[0][
                'type'] == ModelBuilder.LAYER_INPUT and self.build_model[-1][
                    'type'] == ModelBuilder.LAYER_OUTPUT:
            self.build_model[0]['shape'] = [None, self.state_size]
            self.build_model[-1]['length'] = self.action_size

        #Load each layer
        self.model_layers = []
        for layer_model in self.build_model:
            if layer_model['type'] == ModelBuilder.LAYER_INPUT:
                if self.build_model.index(layer_model) == 0:
                    self.model_layers.append(
                        placeholder(dtype=tf.float32,
                                    shape=layer_model['shape'],
                                    name='inputs_'))
                else:
                    raise IncoherentBuildModelError(
                        "Input Layer must be the first one.")
            elif layer_model['type'] == ModelBuilder.LAYER_FULLY_CONNECTED:
                self.model_layers.append(
                    layers.dense(inputs=self.model_layers[-1],
                                 units=layer_model['nodes'],
                                 activation=tf.nn.relu,
                                 name=layer_model['name']))
            elif layer_model['type'] == ModelBuilder.LAYER_OUTPUT:
                self.model_layers.append(
                    layers.dense(inputs=self.model_layers[-1],
                                 units=self.action_size,
                                 activation=None))
            else:
                raise UnsupportedBuildModelLayerTypeError(
                    "Unsuported Layer Type " + layer_model['type'])

        #Setup output qsa layer and loss
        self.tf_qsa = placeholder(shape=[None, self.action_size],
                                  dtype=tf.float32)
        self.loss = tf.losses.mean_squared_error(self.tf_qsa,
                                                 self.model_layers[-1])
        self.optimizer = train.AdamOptimizer(self.learning_rate).minimize(
            self.loss)

        #self.logits = layers.dense(self.model_layers[-1], self.action_size)
        #self._states = placeholder(shape=[None, self.state_size], dtype=tf.float32)

        self.sess.run(global_variables_initializer())

        self.saver = train.Saver()
Example #3
0
 def __call__(self, x, reuse=True):
     with tf.variable_scope(self.name) as vs:
         if reuse:
             vs.reuse_variables()
         fc = x
         fc = tf.reshape(fc, shape=[-1, 56, 56, 3])
         fc = layers.conv2d(fc, filters=self.nfilt, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k,
             padding='same', strides=[self.s,self.s], activation=None, name='h1')
         #fc = bn(fc, 'eb1')
         fc = tf.nn.leaky_relu(fc)
         fc = layers.conv2d(fc, filters=self.nfilt*2, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k,
             padding='same', strides=[self.s,self.s], activation=None, name='h2')
         #fc = bn(fc, 'eb2')
         fc = tf.nn.leaky_relu(fc)
         fc = layers.conv2d(fc, filters=self.nfilt*4, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k,
             padding='same', strides=[self.s,self.s], activation=None, name='h3')
         #fc = bn(fc, 'eb3')
         fc = tf.nn.leaky_relu(fc)
         fc = layers.flatten(fc)
         fc = layers.dense(
             fc, self.num_at-1,
             activation=self.act_at,
             kernel_initializer=tf.keras.initializers.glorot_normal()
         )
         return fc
Example #4
0
 def __call__(self, z, reuse=True):
     with tf.variable_scope(self.name) as vs:
         if reuse:
             vs.reuse_variables()
         fc = z
         fc = tf.keras.layers.GaussianNoise(self.noise_z_std)(fc)
         fc = layers.dense(
             fc, 7*7*self.nfilt*4,
             activation=self.act_at,
             kernel_initializer=tf.keras.initializers.glorot_normal()
         )
         fc = tf.reshape(fc, [-1, 7, 7, self.nfilt*4])
         fc = tf.layers.conv2d_transpose(fc, filters=self.nfilt*2, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k,
             padding='same', strides=[self.s,self.s], activation=None, name='d1')
         #fc = bn(fc, 'db1')
         fc = tf.nn.leaky_relu(fc)
         fc = tf.layers.conv2d_transpose(fc, filters=self.nfilt, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k,
             padding='same', strides=[self.s,self.s], activation=None, name='d2')
         #fc = bn(fc, 'db2')
         fc = tf.nn.leaky_relu(fc)
         fc = tf.layers.conv2d_transpose(fc, filters=3, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k,
             padding='same', strides=[self.s,self.s], activation=None, name='recon')
         #fc = bn(fc, 'db3')
         if self.act_out != None:
             fc = self.act_out(fc)
         fc = tf.reshape(fc, shape=[-1, self.x_dim])
         return fc
Example #5
0
def decode_to_shape(inputs, shape, scope):
    """Encode the given tensor to given image shape."""
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        x = inputs
        x = tfl.flatten(x)
        x = tfl.dense(x, shape[2], activation=None, name="dec_dense")
        x = tf.expand_dims(x, axis=1)
        return x
Example #6
0
def encode_to_shape(inputs, shape, scope):
    """Encode the given tensor to given image shape."""
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        w, h = shape[1], shape[2]
        x = inputs
        x = tfl.flatten(x)
        x = tfl.dense(x, w * h, activation=None, name="enc_dense")
        x = tf.reshape(x, (-1, w, h, 1))
        return x
Example #7
0
def cartpole_model(x_input, num_actions, scope, reuse=False):
    """For CartPole we'll use a smaller network.
    """
    with tf.variable_scope(scope, reuse=reuse):
        # --------
        # Migrated to tf 1.14
        # --------
        # out = x_input
        # out = layers.fully_connected(out, num_outputs=32,
        #         activation_fn=tf.nn.tanh)
        # out = layers.fully_connected(out, num_outputs=32,
        #         activation_fn=tf.nn.tanh)
        # out = layers.fully_connected(out, num_outputs=num_actions,
        #         activation_fn=None)
        out = x_input
        out = layers.dense(out, units=32, activation=tf.nn.tanh)
        out = layers.dense(out, units=32, activation=tf.nn.tanh)
        out = layers.dense(out, units=num_actions, activation=None)
        return out
Example #8
0
def graph_conv(_X, _A, O):
    """
    Equation of graph convolution.
    _X: vector X. Nodes.
    _A: adjacency matrix. Edges or path.
    """
    out = dense(_X, units=O, use_bias=True)
    out = tf.matmul(_A, out)
    out = tf.nn.relu(out)

    return out
Example #9
0
def cdna_transformation(prev_image, cdna_input, num_masks, color_channels,
                        dna_kernel_size, relu_shift):
    """Apply convolutional dynamic neural advection to previous image.

  Args:
    prev_image: previous image to be transformed.
    cdna_input: hidden lyaer to be used for computing CDNA kernels.
    num_masks: number of masks and hence the number of CDNA transformations.
    color_channels: the number of color channels in the images.
    dna_kernel_size: dna kernel size.
    relu_shift: shift for ReLU function.

  Returns:
    List of images transformed by the predicted CDNA kernels.
  """
    batch_size = tf.shape(cdna_input)[0]
    height = int(prev_image.get_shape()[1])
    width = int(prev_image.get_shape()[2])

    # Predict kernels using linear function of last hidden layer.
    cdna_kerns = tfl.dense(cdna_input,
                           dna_kernel_size * dna_kernel_size * num_masks,
                           name="cdna_params",
                           activation=None)

    # Reshape and normalize.
    cdna_kerns = tf.reshape(
        cdna_kerns,
        [batch_size, dna_kernel_size, dna_kernel_size, 1, num_masks])
    cdna_kerns = (tf.nn.relu(cdna_kerns - relu_shift) + relu_shift)
    norm_factor = tf.reduce_sum(cdna_kerns, [1, 2, 3], keep_dims=True)
    cdna_kerns /= norm_factor

    # Treat the color channel dimension as the batch dimension since the same
    # transformation is applied to each color channel.
    # Treat the batch dimension as the channel dimension so that
    # depthwise_conv2d can apply a different transformation to each sample.
    cdna_kerns = tf.transpose(cdna_kerns, [1, 2, 0, 4, 3])
    cdna_kerns = tf.reshape(
        cdna_kerns, [dna_kernel_size, dna_kernel_size, batch_size, num_masks])
    # Swap the batch and channel dimensions.
    prev_image = tf.transpose(prev_image, [3, 1, 2, 0])

    # Transform image.
    transformed = tf.nn.depthwise_conv2d(prev_image, cdna_kerns, [1, 1, 1, 1],
                                         "SAME")

    # Transpose the dimensions to where they belong.
    transformed = tf.reshape(
        transformed, [color_channels, height, width, batch_size, num_masks])
    transformed = tf.transpose(transformed, [3, 1, 2, 0, 4])
    transformed = tf.unstack(transformed, axis=-1)
    return transformed
Example #10
0
def reward_prediction_video_conv(frames, rewards, prediction_len):
    """A reward predictor network from observed/predicted images.

     The inputs is a list of frames.

  Args:
    frames: the list of input images.
    rewards: previously observed rewards.
    prediction_len: the length of the reward vector.

  Returns:
    the predicted rewards.
  """
    x = tf.concat(frames, axis=-1)
    x = tfl.conv2d(x, 32, [3, 3], strides=(2, 2), activation=tf.nn.relu)
    x = tfl.conv2d(x, 32, [3, 3], strides=(2, 2), activation=tf.nn.relu)
    x = tfl.conv2d(x, 16, [3, 3], strides=(2, 2), activation=tf.nn.relu)
    x = tfl.conv2d(x, 8, [3, 3], strides=(2, 2), activation=tf.nn.relu)
    x = tfl.flatten(x)

    y = tf.concat(rewards, axis=-1)
    y = tfl.dense(y, 32, activation=tf.nn.relu)
    y = tfl.dense(y, 16, activation=tf.nn.relu)
    y = tfl.dense(y, 8, activation=tf.nn.relu)

    z = tf.concat([x, y], axis=-1)
    z = tfl.dense(z, 32, activation=tf.nn.relu)
    z = tfl.dense(z, 16, activation=tf.nn.relu)
    z = tfl.dense(z, prediction_len, activation=None)
    z = tf.expand_dims(z, axis=-1)
    return z
Example #11
0
def atari_model(img_in, num_actions, scope, reuse=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = img_in
        with tf.variable_scope("convnet"):
            # out = layers.convolution2d(out, num_outputs=32,
            #         kernel_size=8, stride=4, activation_fn=tf.nn.relu)
            # out = layers.convolution2d(out, num_outputs=64,
            #         kernel_size=4, stride=2, activation_fn=tf.nn.relu)
            # out = layers.convolution2d(out, num_outputs=64,
            #         kernel_size=3, stride=1, activation_fn=tf.nn.relu)
            # out = layers.flatten(out)
            print(tf.shape(out))
            out = layers.conv2d(out,
                                filters=32,
                                kernel_size=8,
                                strides=(4, 4),
                                activation=tf.nn.relu)
            print(tf.shape(out))
            out = layers.conv2d(out,
                                filters=64,
                                kernel_size=4,
                                strides=(2, 2),
                                activation=tf.nn.relu)
            print(tf.shape(out))
            out = layers.conv2d(out,
                                filters=64,
                                kernel_size=3,
                                strides=(1, 1),
                                activation=tf.nn.relu)
            print(tf.shape(out))
            out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            # out = layers.fully_connected(out, num_outputs=512,
            #         activation_fn=tf.nn.relu)
            # out = layers.fully_connected(out, num_outputs=num_actions,
            #         activation_fn=None)
            print(tf.shape(out))
            out = layers.dense(out, units=512, activation=tf.nn.relu)
            out = layers.dense(out, units=num_actions, activation=None)
        return out
Example #12
0
def mlp_model(input,
              num_outputs,
              scope,
              reuse=False,
              num_units=64,
              rnn_cell=None):
    # This model takes as input an observation and returns values of all actions
    with tf.compat.v1.variable_scope(scope, reuse=reuse):

        out = input
        out = layers.dense(out,
                           units=num_units,
                           activation=tf.compat.v1.nn.relu)
        out = layers.dense(out,
                           units=num_units,
                           activation=tf.compat.v1.nn.relu)
        out = layers.dense(out, units=num_outputs, activation=None)
        """
        out = layers.Dense(num_units, activation=tf.nn.relu)(input)
        out = layers.Dense(num_units, activation=tf.nn.relu)(out)
        out = layers.Dense(num_outputs, activation=None)(out)
        """
        return out
Example #13
0
def reward_prediction_basic(prediction):
    """The most simple reward predictor.

     This works by averaging the pixels and running a dense layer on top.

  Args:
    prediction: The predicted image.

  Returns:
    the predicted reward.
  """
    x = prediction
    x = tf.reduce_mean(x, axis=[1, 2], keepdims=True)
    x = tf.squeeze(x, axis=[1, 2])
    x = tfl.dense(x, 128, activation=tf.nn.relu, name="reward_pred")
    return x
Example #14
0
 def __call__(self, x, reuse=True):
     with tf.variable_scope(self.name) as vs:
         if reuse:
             vs.reuse_variables()
         fc = x
         for idx,out_dim in enumerate(self.layers_dim):
             if idx == len(self.layers_dim)-1:
                 act_fun = self.act_at
             else:
                 act_fun = tf.nn.leaky_relu
             fc = layers.dense(
                 fc, out_dim,
                 activation=act_fun,
                 kernel_initializer=tf.keras.initializers.glorot_normal()
             )
         return fc
    def get_q_values_op(self, state, scope, reuse=False):
        """
        Returns Q values for all actions

        Args:
            state: (tf tensor)
                shape = (batch_size, img height, img width, nchannels x config.state_history)
            scope: (string) scope name, that specifies if target network or not
            reuse: (bool) reuse of variables in the scope

        Returns:
            out: (tf tensor) of shape = (batch_size, num_actions)
        """
        # this information might be useful
        num_actions = self.env.action_space.n

        ##############################################################
        """
        TODO:
            Implement a fully connected with no hidden layer (linear
            approximation with bias) using tensorflow.

        HINT:
            - You may find the following functions useful:
                - tf.layers.flatten
                - tf.layers.dense

            - Make sure to also specify the scope and reuse
        """
        ##############################################################
        ################ YOUR CODE HERE - 2-3 lines ##################

        out = layers.flatten(state)
        out = layers.dense(state,units = num_actions, name = scope, reuse = reuse)
        ##############################################################
        ######################## END YOUR CODE #######################

        return out
Example #16
0
 def __call__(self, z, reuse=True, c=None):
     """Defines the decode meta variables"""
     with tf.variable_scope(self.name) as vs:
         if reuse:
             vs.reuse_variables()
         fc = z
         fc = tf.keras.layers.GaussianNoise(self.noise_z_std)(fc)
         if c != None:
             """Optionally concat noise(z) to z """
             fc = tf.concat([fc, z], axis=1)
         for idx,out_dim in enumerate(self.layers_dim):
             """Define activations"""
             if idx == len(self.layers_dim)-1:
                 act_fun = self.act_out
             elif idx == 0:
                 act_fun = self.act_at
             else:
                 act_fun = tf.nn.leaky_relu
             fc = layers.dense(
                 fc, out_dim,
                 activation=act_fun,
                 kernel_initializer=tf.keras.initializers.glorot_normal()
             )
         return fc
Example #17
0
    def __init__(self, state_size, action_size, learning_rate, name='DQLearner'):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate

        with v1.variable_scope(name):
            # We create the placeholders
            # *state_size means that we take each elements of state_size in tuple hence is like if we wrote
            # [None, 84, 84, 4]
            self.inputs_ = v1.placeholder(tf.float32, [None, *state_size], name="inputs")
            self.actions_ = v1.placeholder(tf.float32, [None, 3], name="actions_")

            # Remember that target_Q is the R(s,a) + ymax Qhat(s', a')
            self.target_Q = v1.placeholder(tf.float32, [None], name="target")

            """
            First convnet:
            CNN
            BatchNormalization
            ELU
            """
            # Input is 84x84x4
            self.conv1 = v1l.conv2d(inputs=self.inputs_,
                                          filters=32,
                                          kernel_size=[8, 8],
                                          strides=[4, 4],
                                          padding="VALID",
                                          kernel_initializer=v1.initializers.glorot_uniform(),
                                          name="conv1")

            self.conv1_batchnorm = v1l.batch_normalization(self.conv1,
                                                         training=True,
                                                         epsilon=1e-5,
                                                         name='batch_norm1')

            self.conv1_out = tf.nn.elu(self.conv1_batchnorm, name="conv1_out")
            ## --> [20, 20, 32]

            """
            Second convnet:
            CNN
            BatchNormalization
            ELU
            """
            self.conv2 = v1l.conv2d(inputs=self.conv1_out,
                                          filters=64,
                                          kernel_size=[4, 4],
                                          strides=[2, 2],
                                          padding="VALID",
                                          kernel_initializer=v1.initializers.glorot_uniform(),
                                          name="conv2")

            self.conv2_batchnorm = v1l.batch_normalization(self.conv2,
                                                         training=True,
                                                         epsilon=1e-5,
                                                         name='batch_norm2')

            self.conv2_out = tf.nn.elu(self.conv2_batchnorm, name="conv2_out")
            ## --> [9, 9, 64]

            """
            Third convnet:
            CNN
            BatchNormalization
            ELU
            """
            self.conv3 = v1l.conv2d(inputs=self.conv2_out,
                                  filters=128,
                                  kernel_size=[4, 4],
                                  strides=[2, 2],
                                  padding="VALID",
                                  kernel_initializer=v1.initializers.glorot_uniform(),
                                  name="conv3")

            self.conv3_batchnorm = v1l.batch_normalization(self.conv3,
                                                         training=True,
                                                         epsilon=1e-5,
                                                         name='batch_norm3')

            self.conv3_out = tf.nn.elu(self.conv3_batchnorm, name="conv3_out")
            ## --> [3, 3, 128]

            self.flatten = v1l.flatten(self.conv3_out)
            ## --> [1152]

            self.fc = v1l.dense(inputs=self.flatten,
                              units=512,
                              activation=tf.nn.elu,
                              kernel_initializer=v1.initializers.glorot_uniform(),
                              name="fc1")

            self.output = v1l.dense(inputs=self.fc,
                                  kernel_initializer=v1.initializers.glorot_uniform(),
                                  units=3,
                                  activation=None)

            # Q is our predicted Q value.
            self.Q = tf.math.reduce_sum(tf.math.multiply(self.output, self.actions_), axis=1)

            # The loss is the difference between our predicted Q_values and the Q_target
            # Sum(Qtarget - Q)^2
            self.loss = tf.math.reduce_mean(tf.math.square(self.target_Q - self.Q))

            self.optimizer = v1.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)
Example #18
0
def mlp(x, hidden_sizes, activation=tf.nn.relu, output_activation=None):

    for units in hidden_sizes[:-1]:
        x = dense(x, units, activation=activation)

    return dense(x, hidden_sizes[-1], activation=output_activation)
Example #19
0
    def __init__(self,
                 myScope,
                 h_size,
                 agent,
                 env,
                 trace_length,
                 batch_size,
                 reuse=None,
                 step=False):
        if step:
            trace_length = 1
        else:
            trace_length = trace_length
        with tf.variable_scope(myScope, reuse=reuse):
            self.batch_size = batch_size
            zero_state = tf.zeros((batch_size, h_size * 2), dtype=tf.float32)
            self.gamma_array = tf.placeholder(shape=[1, trace_length],
                                              dtype=tf.float32,
                                              name='gamma_array')
            self.gamma_array_inverse = tf.placeholder(shape=[1, trace_length],
                                                      dtype=tf.float32,
                                                      name='gamma_array_inv')

            self.lstm_state = tf.placeholder(shape=[batch_size, h_size * 2],
                                             dtype=tf.float32,
                                             name='lstm_state')

            if step:
                self.state_input = tf.placeholder(shape=[self.batch_size] +
                                                  env.ob_space_shape,
                                                  dtype=tf.float32,
                                                  name='state_input')
                lstm_state = self.lstm_state
            else:
                self.state_input = tf.placeholder(
                    shape=[batch_size * trace_length] + env.ob_space_shape,
                    dtype=tf.float32,
                    name='state_input')
                lstm_state = zero_state

            self.sample_return = tf.placeholder(shape=[None, trace_length],
                                                dtype=tf.float32,
                                                name='sample_return')
            self.sample_reward = tf.placeholder(shape=[None, trace_length],
                                                dtype=tf.float32,
                                                name='sample_reward')
            with tf.variable_scope('input_proc', reuse=reuse):
                output = layers.conv2d(self.state_input,
                                       kernel_size=(3, 3),
                                       filters=20,
                                       activation=tf.nn.relu,
                                       padding='same')
                output = layers.conv2d(output,
                                       kernel_size=(3, 3),
                                       filters=20,
                                       activation=tf.nn.relu,
                                       padding='same')
                output = layers.flatten(output)
                print('values', output.get_shape())
                self.value = tf.reshape(layers.dense(tf.nn.relu(output), 1),
                                        [-1, trace_length])
            if step:
                output_seq = batch_to_seq(output, self.batch_size, 1)
            else:
                output_seq = batch_to_seq(output, self.batch_size,
                                          trace_length)
            output_seq, state_output = lstm(output_seq,
                                            lstm_state,
                                            scope='rnn',
                                            nh=h_size)
            output = seq_to_batch(output_seq)

            output = layers.dense(output,
                                  units=env.NUM_ACTIONS,
                                  activation=None)
            self.log_pi = tf.nn.log_softmax(output)
            self.lstm_state_output = state_output

            self.actions = tf.placeholder(shape=[None],
                                          dtype=tf.int32,
                                          name='actions')
            self.actions_onehot = tf.one_hot(self.actions,
                                             env.NUM_ACTIONS,
                                             dtype=tf.float32)

            predict = tf.multinomial(self.log_pi, 1)
            self.predict = tf.squeeze(predict)

            self.next_value = tf.placeholder(shape=[None, 1],
                                             dtype=tf.float32,
                                             name='next_value')
            self.next_v = tf.matmul(self.next_value, self.gamma_array_inverse)
            self.target = self.sample_return + self.next_v

            self.td_error = tf.square(self.target - self.value) / 2
            self.loss = tf.reduce_mean(self.td_error)

        self.parameters = []
        self.value_params = []
        for i in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope=myScope):
            if not ('value_params' in i.name):
                self.parameters.append(i)  # i.name if you want just a name
            if 'input_proc' in i.name:
                self.value_params.append(i)

        if not step:
            self.log_pi_action = tf.reduce_mean(tf.multiply(
                self.log_pi, self.actions_onehot),
                                                reduction_indices=1)
            self.log_pi_action_bs = tf.reduce_sum(
                tf.reshape(self.log_pi_action, [-1, trace_length]), 1)
            self.log_pi_action_bs_t = tf.reshape(
                self.log_pi_action, [self.batch_size, trace_length])
            self.trainer = tf.train.GradientDescentOptimizer(learning_rate=1)
            self.updateModel = self.trainer.minimize(
                self.loss, var_list=self.value_params)

        self.setparams = SetFromFlat(self.parameters)
        self.getparams = GetFlat(self.parameters)
        self.param_len = len(self.parameters)

        for var in self.parameters:
            print(var.name, var.get_shape())
Example #20
0
Y_truth = placeholder(tf.float64, shape=(None, n_labels))

# Function for implementation of H⁽l+1)=sigma(A(AH^lW^l)+ b^l).
# With the bias term given by the tf dense layer.
def graph_conv(_X, _A, O):
    """
    Equation of graph convolution.
    _X: vector X. Nodes.
    _A: adjacency matrix. Edges or path.
    """
    out = dense(_X, units=O, use_bias=True)
    out = tf.matmul(_A, out)
    out = tf.nn.relu(out)

    return out

X_new = graph_conv(X, A, 32)
print(X_new)

gconv1 = graph_conv(X, A, 32)
gconv2 = graph_conv(gconv1, A, 32)
gconv3 = graph_conv(gconv2, A, 32)

Y_pred = tf.nn.softmax(dense(gconv3, units=n_labels, use_bias=True), axis=2)
print(Y_pred)

Y_pred = tf.reshape(Y_pred, [-1])
loss = tf.reduce_mean(Y_truth*tf.math.log(Y_pred + 1.0 ** -5))

print(loss)