Esempio n. 1
0
    def build_graph(self):
        input = layer.Input(cfg.config.input)
        self.ph_action = graph.Placeholder(
            np.float32, (None, cfg.config.output.action_size))

        sizes = cfg.config.hidden_sizes
        assert len(
            sizes) > 1, 'You need to provide sizes at least for 2 layers'

        dense_1st = layer.Dense(layer.Flatten(input), sizes[0],
                                layer.Activation.Relu)
        dense_2nd = layer.DoubleDense(dense_1st, self.ph_action, sizes[1],
                                      layer.Activation.Relu)
        layers = [input, dense_1st, dense_2nd]

        net = layer.GenericLayers(dense_2nd, [
            dict(type=layer.Dense, size=size, activation=layer.Activation.Relu)
            for size in sizes[2:]
        ])
        if len(sizes[2:]) > 0:
            layers.append(net)

        self.critic = layer.Dense(net, 1, init_var=3e-3)
        self.ph_state = input.ph_state

        layers.append(self.critic)
        self.weights = layer.Weights(*layers)
Esempio n. 2
0
    def build_graph(self):
        input = layer.Input(cfg.input)

        self.perception =\
            layer.Dense(layer.Flatten(input), cfg.d,  # d=256
                        activation=layer.Activation.Relu)

        self.weights = layer.Weights(input, self.perception)

        self.ph_state = input.ph_state
Esempio n. 3
0
    def build_graph(self):
        input = layer.Input(pg_config.config.input)

        dense = layer.GenericLayers(layer.Flatten(input),
                                    [dict(type=layer.Dense, size=size, activation=layer.Activation.Relu)
                                    for size in pg_config.config.hidden_sizes])

        actor = layer.Dense(dense, pg_config.config.output.action_size,
                            activation=layer.Activation.Softmax)

        self.state = input.ph_state
        self.weights = layer.Weights(input, dense, actor)
        return actor.node
Esempio n. 4
0
    def build_graph(self):
        input = layer.Input(cfg.config.input)

        dense = layer.GenericLayers(layer.Flatten(input), [
            dict(type=layer.Dense, size=size, activation=layer.Activation.Relu)
            for size in cfg.config.hidden_sizes
        ])

        actor = layer.DDPGActor(dense, cfg.config.output)

        self.ph_state = input.ph_state
        self.actor = actor.scaled_out
        self.weights = layer.Weights(input, dense, actor)
Esempio n. 5
0
    def build_graph(self, input_placeholder):
        conv_layer = dict(type=layer.Convolution,
                          activation=layer.Activation.Elu,
                          n_filters=32,
                          filter_size=[3, 3],
                          stride=[2, 2],
                          border=layer.Border.Same)
        input_layers = [dict(conv_layer)
                        ] * 4 if da3c_config.config.input.universe else None
        input = layer.Input(da3c_config.config.input,
                            descs=input_layers,
                            input_placeholder=input_placeholder)

        sizes = da3c_config.config.hidden_sizes
        layers = [input]
        flattened_input = layer.Flatten(input)

        fc_layers = layer.GenericLayers(flattened_input, [
            dict(
                type=layer.Dense, size=size, activation=layer.Activation.Relu6)
            for size in sizes[:-1]
        ])
        layers.append(fc_layers)

        last_size = fc_layers.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if da3c_config.config.use_lstm:
            lstm = layer.LSTM(graph.Expand(fc_layers, 0), n_units=last_size)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.ph_lstm_state = lstm.ph_state
            self.lstm_zero_state = lstm.zero_state
            self.lstm_state = lstm.state
        else:
            head = layer.Dense(fc_layers,
                               last_size,
                               activation=layer.Activation.Relu6)
            layers.append(head)

        self.ph_state = input.ph_state
        self.weight = layer.Weights(*layers)
        return head.node
Esempio n. 6
0
    def build_graph(self):
        input = layer.Input(trpo_config.config.input)

        head = layer.GenericLayers(layer.Flatten(input),
                                   [dict(type=layer.Dense, size=size, activation=layer.Activation.Tanh)
                                    for size in trpo_config.config.hidden_sizes])

        if trpo_config.config.output.continuous:
            output = layer.Dense(head, trpo_config.config.output.action_size)
            actor = ConcatFixedStd(output)
            actor_layers = [output, actor]
        else:
            actor = layer.Dense(head, trpo_config.config.output.action_size,
                                activation=layer.Activation.Softmax)
            actor_layers = [actor]

        self.ph_state = input.ph_state
        self.actor = actor
        self.weights = layer.Weights(*([input, head] + actor_layers))
Esempio n. 7
0
    def build_graph(self):
        conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu,
                          n_filters=32, filter_size=[3, 3], stride=[2, 2],
                          border=layer.Border.Same)
        input_layers = [dict(conv_layer)] * 4 if da3c_config.config.input.universe else None
        input = layer.Input(da3c_config.config.input, descs=input_layers)

        sizes = da3c_config.config.hidden_sizes
        layers = [input]
        flattened_input = layer.Flatten(input)

        last_size = flattened_input.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if da3c_config.config.use_lstm:
            lstm = layer.LSTM(graph.Expand(flattened_input, 0), n_units=last_size)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.ph_lstm_state = lstm.ph_state
            self.lstm_zero_state = lstm.zero_state
            self.lstm_state = lstm.state
        else:
            head = layer.GenericLayers(flattened_input,
                                       [dict(type=layer.Dense, size=size,
                                             activation=layer.Activation.Relu) for size in sizes])
            layers.append(head)

        actor = layer.Actor(head, da3c_config.config.output)
        critic = layer.Dense(head, 1)
        layers.extend((actor, critic))

        self.ph_state = input.ph_state
        self.actor = actor
        self.critic = graph.Flatten(critic)
        self.weights = layer.Weights(*layers)
        self.actor_weights = layer.Weights(actor)
Esempio n. 8
0
    def build_graph(self):
        input = layer.Input(config.input)

        hidden = layer.GenericLayers(layer.Flatten(input), [
            dict(type=layer.Dense, size=size, activation=layer.Activation.Tanh)
            for size in config.hidden_sizes
        ])

        weights = [input, hidden]

        if config.dueling_dqn:
            if config.hidden_sizes:
                v_input, a_input = tf.split(hidden.node, [
                    config.hidden_sizes[-1] // 2, config.hidden_sizes[-1] // 2
                ],
                                            axis=1)

                v_input = graph.TfNode(v_input)
                a_input = graph.TfNode(a_input)
            else:
                v_input, a_input = hidden, hidden

            v_output = layer.Dense(v_input, 1)
            a_output = layer.Dense(a_input, config.output.action_size)

            output = v_output.node + a_output.node - tf.reduce_mean(
                a_output.node, axis=1, keep_dims=True)
            output = graph.TfNode(output)

            weights.extend([v_output, a_output])
        else:
            output = layer.Dense(hidden, config.output.action_size)
            weights.append(output)

        self.ph_state = input.ph_state
        self.output = output
        self.weights = layer.Weights(*weights)
Esempio n. 9
0
    def build_graph(self):
        conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu,
                          n_filters=32, filter_size=[3, 3], stride=[2, 2],
                          border=layer.Border.Same)
        input = layer.Input(cfg.config.input, descs=[dict(conv_layer)] * 4)

        shape = [None] + [cfg.config.output.action_size]
        self.ph_probs = graph.Placeholder(np.float32, shape=shape, name='act_probs')
        self.ph_taken = graph.Placeholder(np.int32, shape=(None,), name='act_taken')

        flattened_input = layer.Flatten(input)
        last_size = flattened_input.node.shape.as_list()[-1]

        inverse_inp = graph.Reshape(input, [-1, last_size*2])

        get_first = graph.TfNode(inverse_inp.node[:, :last_size])
        get_second = graph.TfNode(inverse_inp.node[:, last_size:])

        forward_inp = graph.Concat([get_first, self.ph_probs], axis=1)

        fc_size = cfg.config.hidden_sizes[-1]
        inv_fc1 = layer.Dense(inverse_inp, fc_size, layer.Activation.Relu)
        inv_fc2 = layer.Dense(inv_fc1, shape[-1])   # layer.Activation.Softmax

        fwd_fc1 = layer.Dense(forward_inp, fc_size, layer.Activation.Relu)
        fwd_fc2 = layer.Dense(fwd_fc1, last_size)

        inv_loss = graph.SparseSoftmaxCrossEntropyWithLogits(inv_fc2, self.ph_taken).op
        fwd_loss = graph.L2loss(fwd_fc2.node - get_second.node).op

        self.ph_state = input.ph_state  # should be even wrt to batch_size for now
        self.rew_out = graph.TfNode(cfg.config.icm.nu * fwd_loss)

        self.loss = graph.TfNode(cfg.config.icm.beta * fwd_loss + (1 - cfg.config.icm.beta) * inv_loss)

        layers = [input, inv_fc1, inv_fc2, fwd_fc1, fwd_fc2]
        self.weights = layer.Weights(*layers)