def __init__( self, pre_graph_builder, node_dim, conv_type='GSage', num_conv_layers=3, hidden_activation=None, output_activation=None, ): super(GNNNet, self).__init__() print('gnn') # graph builder self.pre_graph_builder = pre_graph_builder # convs self.node_input_dim = pre_graph_builder.output_dim self.node_dim = node_dim self.conv_type = conv_type self.num_conv_layers = num_conv_layers self.convs = self.build_convs(self.node_input_dim, self.node_dim, self.num_conv_layers) self.hidden_activations = nn.ModuleList([ get_activation(hidden_activation) for l in range(num_conv_layers) ]) self.output_activation = get_activation(output_activation)
def _create_network(self, current_state_inputs, goal_inputs): reuse = self._reuse name_prefix = self.name_prefix variable_count = len(tf.compat.v1.trainable_variables()) activation = get_activation( self.config['value_estimator']['activation']) network_layers = self.config['value_estimator']['layers'] current_input = tf.concat((current_state_inputs, goal_inputs), axis=1) current = current_input for i, layer_size in enumerate(network_layers): current = tf.layers.dense( current, layer_size, activation=activation, name='{}_layer_{}'.format(name_prefix, i), reuse=reuse, ) value_estimation = tf.layers.dense( current, 1, activation=None, name='{}_prediction'.format(name_prefix), reuse=reuse) model_variables = tf.compat.v1.trainable_variables()[variable_count:] if reuse: assert len(model_variables) == 0 else: self._reuse = True return value_estimation, model_variables
def __init__(self, in_channels, out_channels, activation='relu', normalize_emb=False, aggr='mean'): super(GraphSage2, self).__init__(aggr=aggr) self.in_channels = in_channels self.out_channels = out_channels self.message_lin = nn.Linear(2*in_channels, out_channels) self.agg_lin = nn.Linear(in_channels+out_channels, out_channels) self.message_activation = get_activation(activation) self.normalize_emb = normalize_emb
def _create_network(self, current_state_inputs, goal_inputs, is_baseline=False): if is_baseline: name_prefix = '{}_baseline'.format(self.name_prefix) reuse = False else: name_prefix = self.name_prefix reuse = self._reuse variable_count = len(tf.compat.v1.trainable_variables()) activation = get_activation(self.config['policy']['activation']) network_layers = self.config['policy']['layers'] learn_std = self.config['policy']['learn_std'] base_std = self.base_std_variable current_input = tf.concat((current_state_inputs, goal_inputs), axis=1) current = current_input for i, layer_size in enumerate(network_layers): current = tf.layers.dense( current, layer_size, activation=activation, name='{}_layer_{}'.format(name_prefix, i), reuse=reuse, ) if learn_std: normal_dist_parameters = tf.layers.dense( current, self.action_size * 2, activation=None, name='{}_normal_dist_parameters'.format(name_prefix), reuse=reuse, ) split_normal_dist_parameters = tf.split(normal_dist_parameters, 2, axis=1) bias = split_normal_dist_parameters[0] std = split_normal_dist_parameters[1] std = tf.math.softplus(std) std = std + base_std else: normal_dist_parameters = tf.layers.dense( current, self.action_size, activation=None, name='{}_normal_dist_parameters'.format(name_prefix), reuse=reuse, ) bias = normal_dist_parameters std = [base_std] * self.action_size if self.config['policy']['bias_activation_is_tanh']: bias = tf.tanh(bias) bias_towards_goal = self.config['policy']['bias_towards_goal'] if bias_towards_goal is not None: bias = bias + current_state_inputs * ( 1. - bias_towards_goal) + goal_inputs * bias_towards_goal distribution = tfp.distributions.MultivariateNormalDiag(loc=bias, scale_diag=std) model_variables = tf.compat.v1.trainable_variables()[variable_count:] if reuse: assert len(model_variables) == 0 elif not is_baseline: self._reuse = True return distribution, model_variables
def _create_network(self, start_inputs, goal_inputs, is_baseline=False): if is_baseline: name_prefix = '{}_baseline'.format(self.name_prefix) reuse = False else: name_prefix = self.name_prefix reuse = self._reuse variable_count = len(tf.compat.v1.trainable_variables()) activation = get_activation(self.config['policy']['activation']) network_layers = self.config['policy']['layers'] learn_std = self.config['policy']['learn_std'] distance_adaptive_std = self.config['policy']['distance_adaptive_std'] base_std = tf.squeeze(tf.tile(tf.reshape(self.base_std_variable, (1, 1)), (self.state_size, 1)), axis=-1) if distance_adaptive_std: # if the std is distance adaptive, the base std if of the form: # (base_std_from_config + softplus(learnable_parameters)) * dist(start, goal) learnable_distance_coeff = tf.layers.dense( tf.ones((1, 1)), self.state_size, activation=tf.nn.softplus, name='{}_std_coeff'.format(name_prefix), reuse=reuse, use_bias=False ) # with tf.variable_scope("std_scope", reuse=reuse): # learnable_distance_coeff = tf.nn.softplus( # tf.Variable([0.0]*self.state_size, trainable=True, shape=self.state_size) # ) base_std = base_std + learnable_distance_coeff distance = tf.linalg.norm(start_inputs - goal_inputs, axis=1) base_std = tf.expand_dims(distance, axis=1) * base_std current_input = tf.concat((start_inputs, goal_inputs), axis=1) shift = (start_inputs + goal_inputs) * 0.5 if self.config['policy']['include_middle_state_as_input']: current_input = tf.concat((current_input, shift), axis=1) current = current_input for i, layer_size in enumerate(network_layers): current = tf.layers.dense( current, layer_size, activation=activation, name='{}_layer_{}'.format(name_prefix, i), reuse=reuse, ) if learn_std: normal_dist_parameters = tf.layers.dense( current, self.state_size * 2, activation=None, name='{}_normal_dist_parameters'.format(name_prefix), reuse=reuse, ) split_normal_dist_parameters = tf.split(normal_dist_parameters, 2, axis=1) bias = split_normal_dist_parameters[0] std = split_normal_dist_parameters[1] std = tf.math.softplus(std) std = std + base_std else: normal_dist_parameters = tf.layers.dense( current, self.state_size, activation=None, name='{}_normal_dist_parameters'.format(name_prefix), reuse=reuse, ) bias = normal_dist_parameters std = base_std if self.config['policy']['bias_activation_is_tanh']: bias = tf.tanh(bias) if self.config['policy']['bias_around_midpoint']: bias = bias + shift distribution = tfp.distributions.MultivariateNormalDiag(loc=bias, scale_diag=std) model_variables = tf.compat.v1.trainable_variables()[variable_count:] if reuse: assert len(model_variables) == 0 elif not is_baseline: self._reuse = True return distribution, model_variables