def _build(self, attribute_value): tf.summary.histogram('cont_attribute_value_histogram', attribute_value) embedding = snt.Sequential([ snt.nets.MLP([self._attr_embedding_dim] * 3, activate_final=True, use_dropout=True), snt.LayerNorm(), ])(tf.cast(attribute_value, dtype=tf.float32)) tf.summary.histogram('cont_embedding_histogram', embedding) return embedding
def make_ensemble(num_actions: int, num_ensemble: int = 20, num_hidden_layers: int = 2, num_units: int = 50, prior_scale: float = 3.) -> Sequence[snt.Module]: """Convenience function to make an ensemble from flags.""" output_sizes = [num_units] * num_hidden_layers + [num_actions] ensemble = [] for _ in range(num_ensemble): network = snt.Sequential([ snt.Flatten(), snt.nets.MLP(output_sizes), ]) prior_network = snt.Sequential([ snt.Flatten(), snt.nets.MLP(output_sizes), ]) ensemble.append(NetworkWithPrior(network, prior_network, prior_scale)) return ensemble
def _build(self, inputs): hparams = self._hparams hidden = snt.Sequential([ util.concat_features, util.make_mlp(hparams, hparams.obs_decoder_fc_hidden_layers, activate_final=True), ])(inputs) return (self._build_game_output(hidden), self._build_score(hidden), self._build_game_over(hidden))
def run(bsuite_id: Text) -> Text: """Runs a DQN agent on a given bsuite environment, logging to CSV.""" env = bsuite.load_and_record( bsuite_id=bsuite_id, save_path=FLAGS.save_path, logging_mode=FLAGS.logging_mode, overwrite=FLAGS.overwrite, ) # Making the networks. hidden_units = [FLAGS.num_units] * FLAGS.num_hidden_layers online_network = snt.Sequential([ snt.Flatten(), snt.nets.MLP(hidden_units + [env.action_spec().num_values]), ]) target_network = snt.Sequential([ snt.Flatten(), snt.nets.MLP(hidden_units + [env.action_spec().num_values]), ]) agent = dqn.DQNTF2( action_spec=env.action_spec(), online_network=online_network, target_network=target_network, batch_size=FLAGS.batch_size, discount=FLAGS.discount, replay_capacity=FLAGS.replay_capacity, min_replay_size=FLAGS.min_replay_size, sgd_period=FLAGS.sgd_period, target_update_period=FLAGS.target_update_period, optimizer=snt.optimizers.Adam(learning_rate=FLAGS.learning_rate), epsilon=FLAGS.epsilon, seed=FLAGS.seed, ) experiment.run( agent=agent, environment=env, num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes, # pytype: disable=attribute-error verbose=FLAGS.verbose) return bsuite_id
def _build(self, inpt): flatten = snt.BatchFlatten() mlp = MLP(self._n_hidden, n_out=8) seq = snt.Sequential([flatten, mlp]) params = seq(inpt) scale_offset = tf.get_variable('scale_offset', initializer=self._scale_offset) return params[..., :4], params[..., 4:] + scale_offset
def make_value_func_dm_control( value_layer_sizes: str = '512,512,256', adversarial_layer_sizes: str = '512,512,256', ) -> Tuple[snt.Module, snt.Module]: layer_sizes = list(map(int, value_layer_sizes.split(','))) value_function = snt.Sequential([ networks.CriticMultiplexer(), networks.LayerNormMLP(layer_sizes, activate_final=True), snt.Linear(1) ]) layer_sizes = list(map(int, adversarial_layer_sizes.split(','))) advsarial_function = snt.Sequential([ networks.CriticMultiplexer(), networks.LayerNormMLP(layer_sizes, activate_final=True), snt.Linear(1) ]) return value_function, advsarial_function
def make_conv_model(): return snt.Sequential([ snt.nets.ConvNet2D(output_channels=[32, 32], kernel_shapes=[3, 3], strides=[1, 1], paddings=['VALID', 'VALID'], activate_final=True), snt.BatchFlatten(), snt.nets.MLP([256], activate_final=True), ])
def _make_network(num_actions: int, torso_layers: Sequence[int] = [5], head_layers: Sequence[int] = [5]): network = snt.Sequential([ # Torso MLP. snt.nets.MLP(torso_layers, activate_final=True), # Dueling MLP head. networks.DuellingMLP(num_actions=num_actions, hidden_sizes=head_layers) ]) return network
def testWarning(self): seq = snt.Sequential([snt.Linear(output_size=23), snt.Linear(output_size=42)]) seq(tf.placeholder(dtype=tf.float32, shape=[2, 3])) with mock.patch.object(tf.logging, "warning") as mocked_logging_warning: self.assertEqual((), seq.get_variables()) self.assertTrue(mocked_logging_warning.called) first_call_args = mocked_logging_warning.call_args[0] self.assertTrue("will always return an empty tuple" in first_call_args[0])
def testCopiesModules(self): modules = [snt.Linear(output_size=200), tf.tanh, snt.Linear(output_size=10)] sequential = snt.Sequential(modules) # Modify the list, to simulate PEBKAC. Sequential must make internal copy. modules[1] = "i'm a string, not a module" # Connecting the Sequential would produce a TypeError if `modules` was # stored by reference, rather than making a copy. sequential(tf.placeholder(tf.float32, [23, 42]))
def evaluator( self, variable_source: acme.VariableSource, counter: counting.Counter, ): """The evaluation process.""" action_spec = self._environment_spec.actions observation_spec = self._environment_spec.observations # Create environment and target networks to act with. environment = self._environment_factory(True) agent_networks = self._network_factory(action_spec) # Make sure observation network is defined. observation_network = agent_networks.get('observation', tf.identity) # Create a stochastic behavior policy. evaluator_network = snt.Sequential([ observation_network, agent_networks['policy'], networks.StochasticMeanHead(), ]) # Ensure network variables are created. tf2_utils.create_variables(evaluator_network, [observation_spec]) policy_variables = {'policy': evaluator_network.variables} # Create the variable client responsible for keeping the actor up-to-date. variable_client = tf2_variable_utils.VariableClient( variable_source, policy_variables, update_period=self._variable_update_period) # Make sure not to evaluate a random actor by assigning variables before # running the environment loop. variable_client.update_and_wait() # Create the agent. evaluator = actors.FeedForwardActor( policy_network=evaluator_network, variable_client=variable_client) # Create logger and counter. counter = counting.Counter(counter, 'evaluator') logger = loggers.make_default_logger( 'evaluator', time_delta=self._log_every, steps_key='evaluator_steps') observers = self._make_observers() if self._make_observers else () # Create the run loop and return it. return acme.EnvironmentLoop( environment, evaluator, counter, logger, observers=observers)
def _build(self, inputs): x = inputs n, w, h, c = x.get_shape().as_list() net = snt.Sequential([ lambda x: tf.image.resize_nearest_neighbor(x, [2 * h, 2 * w]), snt.Conv2D(self._num_filters, 3, padding=snt.SAME), tf.nn.leaky_relu, snt.Conv2D(self._num_filters, 3, padding=snt.SAME), tf.nn.leaky_relu, ]) return net(x)
def __init__(self, num_hidden, num_layers, num_output, name='mlp_ode'): super(MLP_ODE, self).__init__(name=name) self._num_hidden = num_hidden self._num_output = num_output self._num_layers = num_layers self._modules = [] for _ in range(self._num_layers - 1): self._modules.append(snt.Linear(self._num_hidden)) self._modules.append(tf.math.tanh) self._modules.append(snt.Linear(self._num_output)) self._model = snt.Sequential(self._modules)
def make_value_func_bsuite(environment_spec: EnvironmentSpec, layer_sizes: str = '50,50', ) -> snt.Module: layer_sizes = list(map(int, layer_sizes.split(','))) action_network = functools.partial( tf.one_hot, depth=environment_spec.actions.num_values) value_function = snt.Sequential([ networks.CriticMultiplexer(action_network=action_network), snt.nets.MLP(layer_sizes, activate_final=True), snt.Linear(1)]) return value_function
def __init__(self, environment_spec: EnvironmentSpec, layer_sizes: Sequence[int]): super(ValueFunction, self).__init__() action_network = functools.partial( tf.one_hot, depth=environment_spec.actions.num_values) self._net = snt.Sequential([ networks.CriticMultiplexer(action_network=action_network), snt.nets.MLP(layer_sizes, activate_final=True), snt.Linear(1) ])
def make_mlp_model(latent_size, num_layers): """Instantiates a new MLP, followed by LayerNorm. The parameters of each new MLP are not shared with others generated by this function. Returns: A Sonnet module which contains the MLP and LayerNorm. """ return snt.Sequential([ snt.nets.MLP([latent_size] * num_layers, activate_final=True), snt.LayerNorm() ])
def residual_block(main_channel, residual_hiddens): output = snt.Sequential([ snt.Conv2D(output_channels=residual_hiddens, kernel_shape=(3, 3), stride=(1, 1)), tf.nn.relu, snt.Conv2D(output_channels=main_channel, kernel_shape=(1, 1), stride=(1, 1)), tf.nn.relu, ]) return output
def upscale_conv(num_blocks=5, init_filter_num=256, **network_kwargs): blocklist = [ snt.Conv2D(init_filter_num, 4, padding=snt.SAME), tf.nn.leaky_relu, snt.Conv2D(init_filter_num, 3, padding=snt.SAME), tf.nn.leaky_relu, ] num_filters = init_filter_num for _ in range(num_blocks): blocklist.append(UpscaleBlock(num_filters)) num_filters /= 2 return snt.Sequential(blocklist)