Exemple #1
0
    def build_graph(self, input_placeholder):
        input = layer.ConfiguredInput(dppo_config.config.input, input_placeholder=input_placeholder)
        layers = [input]

        sizes = dppo_config.config.hidden_sizes
        activation = layer.get_activation(dppo_config.config)

        fc_layers = layer.GenericLayers(layer.Flatten(input),
                                        [dict(type=layer.Dense, size=size, activation=activation)
                                        for size in sizes[:-1]])
        layers.append(fc_layers)

        last_size = fc_layers.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if dppo_config.config.use_lstm:
            lstm = layer.lstm(dppo_config.config.lstm_type,
                              graph.Expand(fc_layers, 0), n_units=last_size,
                              n_cores=dppo_config.config.lstm_num_cores)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.lstm_items = {"ph_lstm_state": lstm.ph_state,
                               "lstm_zero_state": lstm.zero_state,
                               "lstm_state": lstm.state,
                               "lstm_reset_timestep": lstm.reset_timestep}
        else:
            head = layer.Dense(fc_layers, last_size, activation=activation)
            layers.append(head)

        self.ph_state = input.ph_state
        self.weight = layer.Weights(*layers)
        return head.node
Exemple #2
0
    def build_graph(self):
        input = layer.ConfiguredInput(cfg.config.input)

        dense = layer.GenericLayers(layer.Flatten(input),
                                    [dict(type=layer.Dense, size=size, activation=layer.Activation.Relu)
                                     for size in cfg.config.hidden_sizes])

        actor = layer.DDPGActor(dense, cfg.config.output)

        self.ph_state = input.ph_state
        self.actor = actor.scaled_out
        self.weights = layer.Weights(input, dense, actor)
Exemple #3
0
    def build_graph(self):
        input = layer.ConfiguredInput(pg_config.config.input)

        dense = layer.GenericLayers(layer.Flatten(input),
                                    [dict(type=layer.Dense, size=size, activation=layer.Activation.Relu)
                                    for size in pg_config.config.hidden_sizes])

        actor = layer.Dense(dense, pg_config.config.output.action_size,
                            activation=layer.Activation.Softmax)

        self.state = input.ph_state
        self.weights = layer.Weights(input, dense, actor)
        return actor.node
Exemple #4
0
    def build_graph(self):
        input = layer.ConfiguredInput(trpo_config.config.input)
        # add one extra feature for timestep
        ph_step = graph.Placeholder(np.float32, shape=[None, 1])
        state = (input.ph_state, ph_step)

        concatenated = graph.Concat([layer.Flatten(input), ph_step], axis=1)

        activation = layer.Activation.get_activation(
            trpo_config.config.activation)
        head = layer.GenericLayers(concatenated, [
            dict(type=layer.Dense, size=size, activation=activation)
            for size in trpo_config.config.hidden_sizes
        ])
        value = layer.Dense(head, 1)

        ph_ytarg_ny = graph.Placeholder(np.float32)
        mse = graph.TfNode(
            tf.reduce_mean(tf.square(ph_ytarg_ny.node - value.node)))

        weights = layer.Weights(input, head, value)

        sg_get_weights_flatten = graph.GetVariablesFlatten(weights)
        sg_set_weights_flatten = graph.SetVariablesFlatten(weights)

        l2 = graph.TfNode(1e-3 * tf.add_n([
            tf.reduce_sum(tf.square(v))
            for v in utils.Utils.flatten(weights.node)
        ]))
        loss = graph.TfNode(l2.node + mse.node)

        sg_gradients = optimizer.Gradients(weights, loss=loss)
        sg_gradients_flatten = graph.GetVariablesFlatten(
            sg_gradients.calculate)

        self.op_value = self.Op(value, state=state)

        self.op_get_weights_flatten = self.Op(sg_get_weights_flatten)
        self.op_set_weights_flatten = self.Op(
            sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value)

        self.op_compute_loss_and_gradient = self.Ops(loss,
                                                     sg_gradients_flatten,
                                                     state=state,
                                                     ytarg_ny=ph_ytarg_ny)

        self.op_losses = self.Ops(loss,
                                  mse,
                                  l2,
                                  state=state,
                                  ytarg_ny=ph_ytarg_ny)
    def build_graph(self):
        input = layer.ConfiguredInput(da3c_config.config.input)

        sizes = da3c_config.config.hidden_sizes
        layers = [input]
        flattened_input = layer.Flatten(input)

        last_size = flattened_input.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if da3c_config.config.use_lstm:
            lstm = layer.lstm(da3c_config.config.lstm_type,
                              graph.Expand(flattened_input, 0),
                              n_units=last_size,
                              n_cores=da3c_config.config.lstm_num_cores)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.ph_lstm_state = lstm.ph_state
            self.lstm_zero_state = lstm.zero_state
            self.lstm_state = lstm.state
            self.lstm_reset_timestep = lstm.reset_timestep
        else:
            activation = layer.get_activation(da3c_config.config)
            head = layer.GenericLayers(flattened_input, [
                dict(type=layer.Dense, size=size, activation=activation)
                for size in sizes
            ])
            layers.append(head)

        actor = layer.Actor(head, da3c_config.config.output)
        critic = layer.Dense(head, 1)
        layers.extend((actor, critic))

        self.ph_state = input.ph_state
        self.actor = actor
        self.critic = graph.Flatten(critic)
        self.weights = layer.Weights(*layers)
        self.actor_weights = layer.Weights(actor)
Exemple #6
0
    def build_graph(self):
        input = layer.ConfiguredInput(cfg.config.input)
        self.ph_action = graph.Placeholder(np.float32, (None, cfg.config.output.action_size))

        sizes = cfg.config.hidden_sizes
        assert len(sizes) > 1, 'You need to provide sizes at least for 2 layers'

        dense_1st = layer.Dense(layer.Flatten(input), sizes[0], layer.Activation.Relu)
        dense_2nd = layer.DoubleDense(dense_1st, self.ph_action, sizes[1], layer.Activation.Relu)
        layers = [input, dense_1st, dense_2nd]

        net = layer.GenericLayers(dense_2nd,
                                  [dict(type=layer.Dense, size=size,
                                        activation=layer.Activation.Relu) for size in sizes[2:]])
        if len(sizes[2:]) > 0:
            layers.append(net)

        self.critic = layer.Dense(net, 1, init_var=3e-3)
        self.ph_state = input.ph_state

        layers.append(self.critic)
        self.weights = layer.Weights(*layers)
Exemple #7
0
    def build_graph(self):
        input = layer.ConfiguredInput(trpo_config.config.input)

        activation = layer.Activation.get_activation(
            trpo_config.config.activation)
        head = layer.GenericLayers(layer.Flatten(input), [
            dict(type=layer.Dense, size=size, activation=activation)
            for size in trpo_config.config.hidden_sizes
        ])

        if trpo_config.config.output.continuous:
            output = layer.Dense(head, trpo_config.config.output.action_size)
            actor = ConcatFixedStd(output)
            actor_layers = [output, actor]
        else:
            actor = layer.Dense(head,
                                trpo_config.config.output.action_size,
                                activation=layer.Activation.Softmax)
            actor_layers = [actor]

        self.ph_state = input.ph_state
        self.actor = actor
        self.weights = layer.Weights(*([input, head] + actor_layers))
Exemple #8
0
    def build_graph(self):
        input = layer.ConfiguredInput(config.input)

        hidden = layer.GenericLayers(layer.Flatten(input), [
            dict(type=layer.Dense, size=size, activation=layer.Activation.Tanh)
            for size in config.hidden_sizes
        ])

        weights = [input, hidden]

        if config.dueling_dqn:
            if config.hidden_sizes:
                v_input, a_input = tf.split(hidden.node, [
                    config.hidden_sizes[-1] // 2, config.hidden_sizes[-1] // 2
                ],
                                            axis=1)

                v_input = graph.TfNode(v_input)
                a_input = graph.TfNode(a_input)
            else:
                v_input, a_input = hidden, hidden

            v_output = layer.Dense(v_input, 1)
            a_output = layer.Dense(a_input, config.output.action_size)

            output = v_output.node + a_output.node - tf.reduce_mean(
                a_output.node, axis=1, keep_dims=True)
            output = graph.TfNode(output)

            weights.extend([v_output, a_output])
        else:
            output = layer.Dense(hidden, config.output.action_size)
            weights.append(output)

        self.ph_state = input.ph_state
        self.output = output
        self.weights = layer.Weights(*weights)