Beispiel #1
0
    def __init__(
        self,
        pre_graph_builder,
        node_dim,
        conv_type='GSage',
        num_conv_layers=3,
        hidden_activation=None,
        output_activation=None,
    ):
        super(GNNNet, self).__init__()
        print('gnn')
        # graph builder
        self.pre_graph_builder = pre_graph_builder

        # convs
        self.node_input_dim = pre_graph_builder.output_dim
        self.node_dim = node_dim
        self.conv_type = conv_type
        self.num_conv_layers = num_conv_layers
        self.convs = self.build_convs(self.node_input_dim, self.node_dim,
                                      self.num_conv_layers)
        self.hidden_activations = nn.ModuleList([
            get_activation(hidden_activation) for l in range(num_conv_layers)
        ])
        self.output_activation = get_activation(output_activation)
Beispiel #2
0
    def _create_network(self, current_state_inputs, goal_inputs):
        reuse = self._reuse
        name_prefix = self.name_prefix
        variable_count = len(tf.compat.v1.trainable_variables())
        activation = get_activation(
            self.config['value_estimator']['activation'])
        network_layers = self.config['value_estimator']['layers']

        current_input = tf.concat((current_state_inputs, goal_inputs), axis=1)

        current = current_input
        for i, layer_size in enumerate(network_layers):
            current = tf.layers.dense(
                current,
                layer_size,
                activation=activation,
                name='{}_layer_{}'.format(name_prefix, i),
                reuse=reuse,
            )

        value_estimation = tf.layers.dense(
            current,
            1,
            activation=None,
            name='{}_prediction'.format(name_prefix),
            reuse=reuse)

        model_variables = tf.compat.v1.trainable_variables()[variable_count:]
        if reuse:
            assert len(model_variables) == 0
        else:
            self._reuse = True
        return value_estimation, model_variables
Beispiel #3
0
    def __init__(self, in_channels, out_channels,
                 activation='relu',
                 normalize_emb=False,
                 aggr='mean'):
        super(GraphSage2, self).__init__(aggr=aggr)

        self.in_channels = in_channels
        self.out_channels = out_channels

        self.message_lin = nn.Linear(2*in_channels, out_channels)
        self.agg_lin = nn.Linear(in_channels+out_channels, out_channels)
        self.message_activation = get_activation(activation)
        self.normalize_emb = normalize_emb
Beispiel #4
0
    def _create_network(self,
                        current_state_inputs,
                        goal_inputs,
                        is_baseline=False):
        if is_baseline:
            name_prefix = '{}_baseline'.format(self.name_prefix)
            reuse = False
        else:
            name_prefix = self.name_prefix
            reuse = self._reuse
        variable_count = len(tf.compat.v1.trainable_variables())
        activation = get_activation(self.config['policy']['activation'])
        network_layers = self.config['policy']['layers']
        learn_std = self.config['policy']['learn_std']

        base_std = self.base_std_variable
        current_input = tf.concat((current_state_inputs, goal_inputs), axis=1)

        current = current_input
        for i, layer_size in enumerate(network_layers):
            current = tf.layers.dense(
                current,
                layer_size,
                activation=activation,
                name='{}_layer_{}'.format(name_prefix, i),
                reuse=reuse,
            )

        if learn_std:
            normal_dist_parameters = tf.layers.dense(
                current,
                self.action_size * 2,
                activation=None,
                name='{}_normal_dist_parameters'.format(name_prefix),
                reuse=reuse,
            )
            split_normal_dist_parameters = tf.split(normal_dist_parameters,
                                                    2,
                                                    axis=1)
            bias = split_normal_dist_parameters[0]
            std = split_normal_dist_parameters[1]
            std = tf.math.softplus(std)
            std = std + base_std
        else:
            normal_dist_parameters = tf.layers.dense(
                current,
                self.action_size,
                activation=None,
                name='{}_normal_dist_parameters'.format(name_prefix),
                reuse=reuse,
            )
            bias = normal_dist_parameters
            std = [base_std] * self.action_size

        if self.config['policy']['bias_activation_is_tanh']:
            bias = tf.tanh(bias)

        bias_towards_goal = self.config['policy']['bias_towards_goal']
        if bias_towards_goal is not None:
            bias = bias + current_state_inputs * (
                1. - bias_towards_goal) + goal_inputs * bias_towards_goal

        distribution = tfp.distributions.MultivariateNormalDiag(loc=bias,
                                                                scale_diag=std)
        model_variables = tf.compat.v1.trainable_variables()[variable_count:]
        if reuse:
            assert len(model_variables) == 0
        elif not is_baseline:
            self._reuse = True
        return distribution, model_variables
    def _create_network(self, start_inputs, goal_inputs, is_baseline=False):
        if is_baseline:
            name_prefix = '{}_baseline'.format(self.name_prefix)
            reuse = False
        else:
            name_prefix = self.name_prefix
            reuse = self._reuse
        variable_count = len(tf.compat.v1.trainable_variables())
        activation = get_activation(self.config['policy']['activation'])
        network_layers = self.config['policy']['layers']
        learn_std = self.config['policy']['learn_std']
        distance_adaptive_std = self.config['policy']['distance_adaptive_std']

        base_std = tf.squeeze(tf.tile(tf.reshape(self.base_std_variable, (1, 1)), (self.state_size, 1)), axis=-1)
        if distance_adaptive_std:
            # if the std is distance adaptive, the base std if of the form:
            # (base_std_from_config + softplus(learnable_parameters)) * dist(start, goal)
            learnable_distance_coeff = tf.layers.dense(
                tf.ones((1, 1)), self.state_size, activation=tf.nn.softplus, name='{}_std_coeff'.format(name_prefix),
                reuse=reuse, use_bias=False
            )

            # with tf.variable_scope("std_scope", reuse=reuse):
            #     learnable_distance_coeff = tf.nn.softplus(
            #         tf.Variable([0.0]*self.state_size, trainable=True, shape=self.state_size)
            #     )
            base_std = base_std + learnable_distance_coeff
            distance = tf.linalg.norm(start_inputs - goal_inputs, axis=1)
            base_std = tf.expand_dims(distance, axis=1) * base_std
        current_input = tf.concat((start_inputs, goal_inputs), axis=1)
        shift = (start_inputs + goal_inputs) * 0.5
        if self.config['policy']['include_middle_state_as_input']:
            current_input = tf.concat((current_input, shift), axis=1)

        current = current_input
        for i, layer_size in enumerate(network_layers):
            current = tf.layers.dense(
                current, layer_size, activation=activation, name='{}_layer_{}'.format(name_prefix, i), reuse=reuse,
            )

        if learn_std:
            normal_dist_parameters = tf.layers.dense(
                current, self.state_size * 2, activation=None,
                name='{}_normal_dist_parameters'.format(name_prefix), reuse=reuse,
            )
            split_normal_dist_parameters = tf.split(normal_dist_parameters, 2, axis=1)
            bias = split_normal_dist_parameters[0]
            std = split_normal_dist_parameters[1]
            std = tf.math.softplus(std)
            std = std + base_std
        else:
            normal_dist_parameters = tf.layers.dense(
                current,  self.state_size, activation=None,
                name='{}_normal_dist_parameters'.format(name_prefix), reuse=reuse,
            )
            bias = normal_dist_parameters
            std = base_std

        if self.config['policy']['bias_activation_is_tanh']:
            bias = tf.tanh(bias)

        if self.config['policy']['bias_around_midpoint']:
            bias = bias + shift

        distribution = tfp.distributions.MultivariateNormalDiag(loc=bias, scale_diag=std)
        model_variables = tf.compat.v1.trainable_variables()[variable_count:]
        if reuse:
            assert len(model_variables) == 0
        elif not is_baseline:
            self._reuse = True
        return distribution, model_variables