Example #1
0
    def _build_module(self, input_layer):
        # This is almost exactly the same as Dueling Network but we predict the future measurements for each action

        multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead

        # actions expectation tower (expectation stream) - E
        with name_scope("expectation_stream"):
            expectation_stream = neon.Sequential([
                neon.Affine(nout=256, activation=neon.Rectlin(),
                            weight_init=self.weights_init, bias_init=self.biases_init),
                neon.Affine(nout=multistep_measurements_size,
                            weight_init=self.weights_init, bias_init=self.biases_init)
            ])(input_layer)

        # action fine differences tower (action stream) - A
        with name_scope("action_stream"):
            action_stream_unnormalized = neon.Sequential([
                neon.Affine(nout=256, activation=neon.Rectlin(),
                            weight_init=self.weights_init, bias_init=self.biases_init),
                neon.Affine(nout=self.num_actions * multistep_measurements_size,
                            weight_init=self.weights_init, bias_init=self.biases_init),
                neon.Reshape((self.num_actions, multistep_measurements_size))
            ])(input_layer)
            action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized)

        repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0)
        repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0)

        # merge to future measurements predictions
        self.output = repeated_expectation_stream + action_stream
Example #2
0
    def _build_module(self, input_layer):
        # Dueling Network
        # state value tower - V
        output_axis = ng.make_axis(self.num_actions, name='q_values')

        state_value = neon.Sequential([
            neon.Affine(nout=256,
                        activation=neon.Rectlin(),
                        weight_init=self.weights_init,
                        bias_init=self.biases_init),
            neon.Affine(nout=1,
                        weight_init=self.weights_init,
                        bias_init=self.biases_init)
        ])(input_layer)

        # action advantage tower - A
        action_advantage_unnormalized = neon.Sequential([
            neon.Affine(nout=256,
                        activation=neon.Rectlin(),
                        weight_init=self.weights_init,
                        bias_init=self.biases_init),
            neon.Affine(axes=output_axis,
                        weight_init=self.weights_init,
                        bias_init=self.biases_init)
        ])(input_layer)
        action_advantage = action_advantage_unnormalized - ng.mean(
            action_advantage_unnormalized)

        repeated_state_value = ng.expand_dims(
            ng.slice_along_axis(state_value, state_value.axes[0], 0),
            output_axis, 0)

        # merge to state-action value function Q
        self.output = repeated_state_value + action_advantage
Example #3
0
 def _build_module(self, input_layer):
     # Standard Q Network
     self.output = neon.Sequential([
         neon.Affine(nout=self.num_actions,
                     weight_init=self.weights_init,
                     bias_init=self.biases_init)
     ])(input_layer)
def small_model(action_axes):
    return neon.Sequential([
        neon.Affine(weight_init=neon.GlorotInit(),
                    bias_init=neon.ConstantInit(),
                    activation=neon.Tanh(),
                    axes=(action_axes, )),
    ])
Example #5
0
 def _build_module(self):
     self.output = neon.Sequential([
         neon.Affine(nout=512,
                     activation=self.activation_function,
                     weight_init=self.weights_init,
                     bias_init=self.biases_init)
     ])
def model(action_axes):
    return neon.Sequential([
        neon.Affine(
            nout=20,
            weight_init=neon.XavierInit(),
            activation=neon.Tanh(),
            batch_norm=True,
        ),
        neon.Affine(
            nout=20,
            weight_init=neon.XavierInit(),
            activation=neon.Tanh(),
            batch_norm=True,
        ),
        neon.Affine(
            nout=20,
            weight_init=neon.XavierInit(),
            activation=neon.Tanh(),
            batch_norm=True,
        ),
        neon.Affine(
            axes=action_axes,
            weight_init=neon.XavierInit(),
            activation=neon.Rectlin(),
            batch_norm=True,
        ),
    ])
Example #7
0
 def _build_module(self):
     # vector observation
     self.output = neon.Sequential([
         neon.Affine(nout=256,
                     activation=self.activation_function,
                     weight_init=self.weights_init,
                     bias_init=self.biases_init)
     ])
Example #8
0
 def _build_module(self):
     # image observation
     self.output = neon.Sequential([
         neon.Preprocess(functor=lambda x: x / self.input_rescaler),
         neon.Convolution((8, 8, 32), strides=4, activation=self.activation_function,
                          filter_init=self.weights_init, bias_init=self.biases_init),
         neon.Convolution((4, 4, 64), strides=2, activation=self.activation_function,
                          filter_init=self.weights_init, bias_init=self.biases_init),
         neon.Convolution((3, 3, 64), strides=1, activation=self.activation_function,
                          filter_init=self.weights_init, bias_init=self.biases_init)
     ])
def baselines_model(action_axes):
    return neon.Sequential([
        neon.Affine(
            nout=64,
            weight_init=neon.XavierInit(),
            bias_init=neon.ConstantInit(),
            activation=neon.Rectlin(),
            batch_norm=False,
        ),
        neon.Affine(
            axes=action_axes,
            weight_init=neon.XavierInit(),
            bias_init=neon.ConstantInit(),
            activation=None,
        ),
    ])
Example #10
0
def model(action_axes):
    """
    Given the expected action axes, return a model mapping from observation to
    action axes for use by the dqn agent.
    """
    return neon.Sequential([
        neon.Preprocess(lambda x: x / 255.0),
        neon.Convolution(
            (8, 8, 32),
            neon.XavierInit(),
            strides=4,
            activation=neon.Rectlin(),
        ),
        neon.Convolution(
            (4, 4, 64),
            neon.XavierInit(),
            strides=2,
            activation=neon.Rectlin(),
        ),
        neon.Convolution(
            (3, 3, 64),
            neon.XavierInit(),
            strides=1,
            activation=neon.Rectlin(),
        ),
        neon.Affine(
            nout=512,
            weight_init=neon.XavierInit(),
            bias_init=neon.ConstantInit(),
            activation=neon.Rectlin(),
        ),
        neon.Affine(weight_init=neon.XavierInit(),
                    bias_init=neon.ConstantInit(),
                    activation=None,
                    axes=(action_axes, )),
    ])