Beispiel #1
0
    def build_graph(self, input_placeholder):
        input = layer.ConfiguredInput(dppo_config.config.input, input_placeholder=input_placeholder)
        layers = [input]

        sizes = dppo_config.config.hidden_sizes
        activation = layer.get_activation(dppo_config.config)

        fc_layers = layer.GenericLayers(layer.Flatten(input),
                                        [dict(type=layer.Dense, size=size, activation=activation)
                                        for size in sizes[:-1]])
        layers.append(fc_layers)

        last_size = fc_layers.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if dppo_config.config.use_lstm:
            lstm = layer.lstm(dppo_config.config.lstm_type,
                              graph.Expand(fc_layers, 0), n_units=last_size,
                              n_cores=dppo_config.config.lstm_num_cores)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.lstm_items = {"ph_lstm_state": lstm.ph_state,
                               "lstm_zero_state": lstm.zero_state,
                               "lstm_state": lstm.state,
                               "lstm_reset_timestep": lstm.reset_timestep}
        else:
            head = layer.Dense(fc_layers, last_size, activation=activation)
            layers.append(head)

        self.ph_state = input.ph_state
        self.weight = layer.Weights(*layers)
        return head.node
Beispiel #2
0
    def build_graph(self, input_placeholder):
        conv_layer = dict(type=layer.Convolution,
                          activation=layer.Activation.Elu,
                          n_filters=32,
                          filter_size=[3, 3],
                          stride=[2, 2],
                          border=layer.Border.Same)
        input_layers = [dict(conv_layer)
                        ] * 4 if da3c_config.config.input.universe else None
        input = layer.Input(da3c_config.config.input,
                            descs=input_layers,
                            input_placeholder=input_placeholder)

        sizes = da3c_config.config.hidden_sizes
        layers = [input]
        flattened_input = layer.Flatten(input)

        fc_layers = layer.GenericLayers(flattened_input, [
            dict(
                type=layer.Dense, size=size, activation=layer.Activation.Relu6)
            for size in sizes[:-1]
        ])
        layers.append(fc_layers)

        last_size = fc_layers.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if da3c_config.config.use_lstm:
            lstm = layer.LSTM(graph.Expand(fc_layers, 0), n_units=last_size)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.ph_lstm_state = lstm.ph_state
            self.lstm_zero_state = lstm.zero_state
            self.lstm_state = lstm.state
        else:
            head = layer.Dense(fc_layers,
                               last_size,
                               activation=layer.Activation.Relu6)
            layers.append(head)

        self.ph_state = input.ph_state
        self.weight = layer.Weights(*layers)
        return head.node
    def build_graph(self):
        input = layer.ConfiguredInput(da3c_config.config.input)

        sizes = da3c_config.config.hidden_sizes
        layers = [input]
        flattened_input = layer.Flatten(input)

        last_size = flattened_input.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if da3c_config.config.use_lstm:
            lstm = layer.lstm(da3c_config.config.lstm_type,
                              graph.Expand(flattened_input, 0),
                              n_units=last_size,
                              n_cores=da3c_config.config.lstm_num_cores)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.ph_lstm_state = lstm.ph_state
            self.lstm_zero_state = lstm.zero_state
            self.lstm_state = lstm.state
            self.lstm_reset_timestep = lstm.reset_timestep
        else:
            activation = layer.get_activation(da3c_config.config)
            head = layer.GenericLayers(flattened_input, [
                dict(type=layer.Dense, size=size, activation=activation)
                for size in sizes
            ])
            layers.append(head)

        actor = layer.Actor(head, da3c_config.config.output)
        critic = layer.Dense(head, 1)
        layers.extend((actor, critic))

        self.ph_state = input.ph_state
        self.actor = actor
        self.critic = graph.Flatten(critic)
        self.weights = layer.Weights(*layers)
        self.actor_weights = layer.Weights(actor)
Beispiel #4
0
    def build_graph(self):
        conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu,
                          n_filters=32, filter_size=[3, 3], stride=[2, 2],
                          border=layer.Border.Same)
        input_layers = [dict(conv_layer)] * 4 if da3c_config.config.input.universe else None
        input = layer.Input(da3c_config.config.input, descs=input_layers)

        sizes = da3c_config.config.hidden_sizes
        layers = [input]
        flattened_input = layer.Flatten(input)

        last_size = flattened_input.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if da3c_config.config.use_lstm:
            lstm = layer.LSTM(graph.Expand(flattened_input, 0), n_units=last_size)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.ph_lstm_state = lstm.ph_state
            self.lstm_zero_state = lstm.zero_state
            self.lstm_state = lstm.state
        else:
            head = layer.GenericLayers(flattened_input,
                                       [dict(type=layer.Dense, size=size,
                                             activation=layer.Activation.Relu) for size in sizes])
            layers.append(head)

        actor = layer.Actor(head, da3c_config.config.output)
        critic = layer.Dense(head, 1)
        layers.extend((actor, critic))

        self.ph_state = input.ph_state
        self.actor = actor
        self.critic = graph.Flatten(critic)
        self.weights = layer.Weights(*layers)
        self.actor_weights = layer.Weights(actor)
Beispiel #5
0
    def build_graph(self):
        conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu,
                          n_filters=32, filter_size=[3, 3], stride=[2, 2],
                          border=layer.Border.Same)
        input = layer.Input(cfg.config.input, descs=[dict(conv_layer)] * 4)

        shape = [None] + [cfg.config.output.action_size]
        self.ph_probs = graph.Placeholder(np.float32, shape=shape, name='act_probs')
        self.ph_taken = graph.Placeholder(np.int32, shape=(None,), name='act_taken')

        flattened_input = layer.Flatten(input)
        last_size = flattened_input.node.shape.as_list()[-1]

        inverse_inp = graph.Reshape(input, [-1, last_size*2])

        get_first = graph.TfNode(inverse_inp.node[:, :last_size])
        get_second = graph.TfNode(inverse_inp.node[:, last_size:])

        forward_inp = graph.Concat([get_first, self.ph_probs], axis=1)

        fc_size = cfg.config.hidden_sizes[-1]
        inv_fc1 = layer.Dense(inverse_inp, fc_size, layer.Activation.Relu)
        inv_fc2 = layer.Dense(inv_fc1, shape[-1])   # layer.Activation.Softmax

        fwd_fc1 = layer.Dense(forward_inp, fc_size, layer.Activation.Relu)
        fwd_fc2 = layer.Dense(fwd_fc1, last_size)

        inv_loss = graph.SparseSoftmaxCrossEntropyWithLogits(inv_fc2, self.ph_taken).op
        fwd_loss = graph.L2loss(fwd_fc2.node - get_second.node).op

        self.ph_state = input.ph_state  # should be even wrt to batch_size for now
        self.rew_out = graph.TfNode(cfg.config.icm.nu * fwd_loss)

        self.loss = graph.TfNode(cfg.config.icm.beta * fwd_loss + (1 - cfg.config.icm.beta) * inv_loss)

        layers = [input, inv_fc1, inv_fc2, fwd_fc1, fwd_fc2]
        self.weights = layer.Weights(*layers)
Beispiel #6
0
 def build_graph(self, x):
     return graph.Reshape(x, (-1, np.prod(x.node.shape.as_list()[1:]))).node