def _build_module(self, input_layer): # This is almost exactly the same as Dueling Network but we predict the future measurements for each action multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead # actions expectation tower (expectation stream) - E with name_scope("expectation_stream"): expectation_stream = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(nout=multistep_measurements_size, weight_init=self.weights_init, bias_init=self.biases_init) ])(input_layer) # action fine differences tower (action stream) - A with name_scope("action_stream"): action_stream_unnormalized = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(nout=self.num_actions * multistep_measurements_size, weight_init=self.weights_init, bias_init=self.biases_init), neon.Reshape((self.num_actions, multistep_measurements_size)) ])(input_layer) action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized) repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0) repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0) # merge to future measurements predictions self.output = repeated_expectation_stream + action_stream
def _build_module(self, input_layer): # Dueling Network # state value tower - V output_axis = ng.make_axis(self.num_actions, name='q_values') state_value = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(nout=1, weight_init=self.weights_init, bias_init=self.biases_init) ])(input_layer) # action advantage tower - A action_advantage_unnormalized = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(axes=output_axis, weight_init=self.weights_init, bias_init=self.biases_init) ])(input_layer) action_advantage = action_advantage_unnormalized - ng.mean( action_advantage_unnormalized) repeated_state_value = ng.expand_dims( ng.slice_along_axis(state_value, state_value.axes[0], 0), output_axis, 0) # merge to state-action value function Q self.output = repeated_state_value + action_advantage
def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"): InputEmbedder.__init__(self, input_size, batch_size, activation_function, name)
def __init__(self, activation_function=neon.Rectlin(), name="middleware_embedder"): self.name = name self.input = None self.output = None self.weights_init = neon.GlorotInit() self.biases_init = neon.ConstantInit() self.activation_function = activation_function
def model(action_axes): return neon.Sequential([ neon.Affine( nout=20, weight_init=neon.XavierInit(), activation=neon.Tanh(), batch_norm=True, ), neon.Affine( nout=20, weight_init=neon.XavierInit(), activation=neon.Tanh(), batch_norm=True, ), neon.Affine( nout=20, weight_init=neon.XavierInit(), activation=neon.Tanh(), batch_norm=True, ), neon.Affine( axes=action_axes, weight_init=neon.XavierInit(), activation=neon.Rectlin(), batch_norm=True, ), ])
def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"): self.name = name self.input_size = input_size self.batch_size = batch_size self.activation_function = activation_function self.weights_init = neon.GlorotInit() self.biases_init = neon.ConstantInit() self.input = None self.output = None
def model(action_axes): """ Given the expected action axes, return a model mapping from observation to action axes for use by the dqn agent. """ return neon.Sequential([ neon.Preprocess(lambda x: x / 255.0), neon.Convolution( (8, 8, 32), neon.XavierInit(), strides=4, activation=neon.Rectlin(), ), neon.Convolution( (4, 4, 64), neon.XavierInit(), strides=2, activation=neon.Rectlin(), ), neon.Convolution( (3, 3, 64), neon.XavierInit(), strides=1, activation=neon.Rectlin(), ), neon.Affine( nout=512, weight_init=neon.XavierInit(), bias_init=neon.ConstantInit(), activation=neon.Rectlin(), ), neon.Affine(weight_init=neon.XavierInit(), bias_init=neon.ConstantInit(), activation=None, axes=(action_axes, )), ])
def baselines_model(action_axes): return neon.Sequential([ neon.Affine( nout=64, weight_init=neon.XavierInit(), bias_init=neon.ConstantInit(), activation=neon.Rectlin(), batch_norm=False, ), neon.Affine( axes=action_axes, weight_init=neon.XavierInit(), bias_init=neon.ConstantInit(), activation=None, ), ])