Ejemplo n.º 1
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kwargs):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in [256, 256]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            prev_layer_size = size
            # Add a batch norm layer.
            layers.append(nn.BatchNorm1d(prev_layer_size))

        self._logits = SlimFC(in_size=prev_layer_size,
                              out_size=self.num_outputs,
                              initializer=torch_normc_initializer(0.01),
                              activation_fn=None)

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=torch_normc_initializer(1.0),
                                    activation_fn=None)

        self._hidden_layers = nn.Sequential(*layers)
        self._hidden_out = None
Ejemplo n.º 2
0
    def __init__(self, num_states, num_actions, num_nodes=100):
        """
        Initialize a deep Q-learning network for testing algorithm
            in_features: number of features of input.
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQNModule, self).__init__()
        layers = []
        prev_layer_size = num_states
        self.num_actions = num_actions
        self.num_nodes = num_nodes

        # Create layers 0 to second-last.
        self.hidden_out_size = 32
        for i, size in enumerate([512, 128, self.hidden_out_size]):
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            prev_layer_size = size
            # if i == 0:
            #     layers.append(nn.Dropout(p=0.3))
            # Add a batch norm layer.
            # layers.append(nn.BatchNorm1d(prev_layer_size))

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=num_actions,
                                    initializer=torch_normc_initializer(1.0),
                                    activation_fn=None)

        self._hidden_layers = nn.Sequential(*layers)
Ejemplo n.º 3
0
    def __init__(self, num_states=4, num_actions=18):
        """
        Initialize a deep Q-learning network for testing algorithm
            in_features: number of features of input.
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQNModule, self).__init__()
        self.device = torch.device(f"cuda:{dqn_config['cuda_id']}" if torch.
                                   cuda.is_available() else "cpu")
        layers = []
        prev_layer_size = num_states

        self.num_actions = num_actions

        # Create layers 0 to second-last.
        self.hidden_out_size = 64
        for size in [512, 256, 128, self.hidden_out_size]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            prev_layer_size = size
            # Add a batch norm layer.
            # layers.append(nn.BatchNorm1d(prev_layer_size))

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=num_actions,
                                    initializer=torch_normc_initializer(1.0),
                                    activation_fn=None)

        self._hidden_layers = nn.Sequential(*layers)
Ejemplo n.º 4
0
    def __init__(self, device, num_states=4, num_actions=18):
        """
        Initialize a deep Q-learning network for testing algorithm
            in_features: number of features of input.
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQNActionModule, self).__init__()
        self.device = device
        state_layers = []
        action_layers = []

        self.num_states = num_states
        self.num_actions = num_actions

        # Create layers 0 to second-last.
        state_prev_layer_size = num_states
        self.state_hidden_out_size = 32
        for size in [256, 128, self.state_hidden_out_size]:
            state_layers.append(
                SlimFC(in_size=state_prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            state_prev_layer_size = size

        action_prev_layer_size = num_actions
        self.action_hidden_out_size = 32
        for size in [64, self.action_hidden_out_size]:
            action_layers.append(
                SlimFC(in_size=action_prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            action_prev_layer_size = size

        layers = []
        prev_layer_size = self.state_hidden_out_size + self.action_hidden_out_size
        self.hidden_out_size = 32
        for size in [64, self.hidden_out_size]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            prev_layer_size = size

        self._value_branch = SlimFC(in_size=self.hidden_out_size,
                                    out_size=1,
                                    initializer=torch_normc_initializer(1.0),
                                    activation_fn=None)

        self._state_hidden_layers = nn.Sequential(*state_layers)
        self._action_hidden_layers = nn.Sequential(*action_layers)
        self._hidden_layers = nn.Sequential(*layers)
Ejemplo n.º 5
0
    def __init__(self, num_states, num_actions, dqn_config):
        """
        Initialize a deep Q-learning network for testing algorithm
            in_features: number of features of input.
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQNTransitionModule, self).__init__()
        
        self.num_states = num_states
        self.num_actions = num_actions
        self.device = torch.device(f"cuda:{dqn_config['cuda_id']}" if torch.cuda.is_available() else "cpu")

        self.state_emb_model = None
        layers = []
        prev_layer_size = num_states
        self.state_emb_size = 32
        for i, size in enumerate([128, 64, self.state_emb_size]):
            layers.append(
                SlimFC(
                    in_size=prev_layer_size,
                    out_size=size,
                    initializer=torch_normc_initializer(1.0),
                    activation_fn=nn.ReLU))
            prev_layer_size = size
        self.state_emb_model = nn.Sequential(*layers).to(self.device)

        self.action_emb_model = None
        layers = []
        prev_layer_size = num_actions
        self.action_emb_size = 16
        for i, size in enumerate([64, self.action_emb_size]):
            layers.append(
                SlimFC(
                    in_size=prev_layer_size,
                    out_size=size,
                    initializer=torch_normc_initializer(1.0),
                    activation_fn=nn.ReLU))
            prev_layer_size = size
        self.action_emb_model = nn.Sequential(*layers).to(self.device)


        self.transition_emb_model = None
        layers = []
        prev_layer_size = self.state_emb_size + self.action_emb_size
        self.transition_emb_size = self.state_emb_size
        for i, size in enumerate([128, self.transition_emb_size]):
            layers.append(
                SlimFC(
                    in_size=prev_layer_size,
                    out_size=size,
                    initializer=torch_normc_initializer(1.0),
                    activation_fn=nn.ReLU))
            prev_layer_size = size
        self.transition_emb_model = nn.Sequential(*layers).to(self.device)
Ejemplo n.º 6
0
    def __init__(self, num_states, num_actions, transition_model, max_num_nodes, min_num_nodes, dqn_config, num_nodes_delta=1):
        """
        Initialize a deep Q-learning network for testing algorithm
            in_features: number of features of input.
            num_actions: number of action-value to output, one-to-one correspondence to action in game.
        """
        super(DQNDPModule, self).__init__()
        
        self.num_actions = num_actions
        self.max_num_nodes = max_num_nodes
        self.min_num_nodes = min_num_nodes
        self.num_nodes_delta = num_nodes_delta
        self.device = torch.device(f"cuda:{dqn_config['cuda_id']}" if torch.cuda.is_available() else "cpu")
        self.transition_model = transition_model

        self.dp_models = {}
        num_nodes = self.min_num_nodes
        while num_nodes <= self.max_num_nodes:
            layers = []
            prev_layer_size = num_states
            for _, size in enumerate([64, 32]):
                layers.append(
                    SlimFC(
                        in_size=prev_layer_size,
                        out_size=size,
                        initializer=torch_normc_initializer(1.0),
                        activation_fn=nn.ReLU))
                prev_layer_size = size
            layers.append(
                SlimFC(
                    in_size=prev_layer_size,
                    out_size=num_actions,
                    initializer=torch_normc_initializer(1.0),
                    activation_fn=None))
            self.dp_models[num_nodes] = nn.Sequential(*layers).to(self.device)
            num_nodes += self.num_nodes_delta
Ejemplo n.º 7
0
 def __init__(self, num_states, num_actions, num_nodes, device):
     super().__init__()
     self.num_actions = num_actions
     self.num_states = num_states
     self.device = device
     self.num_nodes = num_nodes
 
     layers = []
     prev_layer_size = num_states
     for _, size in enumerate([256, 128, 64]):
         layers.append(
             SlimFC(
                 in_size=prev_layer_size,
                 out_size=size,
                 initializer=torch_normc_initializer(1.0),
                 activation_fn=nn.ReLU))
         prev_layer_size = size
     layers.append(
         SlimFC(
             in_size=prev_layer_size,
             out_size=num_actions,
             initializer=torch_normc_initializer(1.0),
             activation_fn=None))
     self.model = nn.Sequential(*layers).to(device).share_memory()
Ejemplo n.º 8
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        self.original_space = obs_space.original_space if \
            hasattr(obs_space, "original_space") else obs_space
        assert isinstance(self.original_space, (Dict, Tuple)), \
            "`obs_space.original_space` must be [Dict|Tuple]!"

        self.processed_obs_space = self.original_space if \
            model_config.get("_disable_preprocessor_api") else obs_space

        nn.Module.__init__(self)
        TorchModelV2.__init__(self, self.original_space, action_space,
                              num_outputs, model_config, name)

        self.flattened_input_space = flatten_space(self.original_space)

        # Atari type CNNs or IMPALA type CNNs (with residual layers)?
        # self.cnn_type = self.model_config["custom_model_config"].get(
        #     "conv_type", "atari")

        # Build the CNN(s) given obs_space's image components.
        self.cnns = {}
        self.one_hot = {}
        self.flatten = {}
        concat_size = 0
        for i, component in enumerate(self.flattened_input_space):
            # Image space.
            if len(component.shape) == 3:
                config = {
                    "conv_filters":
                    model_config["conv_filters"] if "conv_filters"
                    in model_config else get_filter_config(obs_space.shape),
                    "conv_activation":
                    model_config.get("conv_activation"),
                    "post_fcnet_hiddens": [],
                }
                # if self.cnn_type == "atari":
                cnn = ModelCatalog.get_model_v2(component,
                                                action_space,
                                                num_outputs=None,
                                                model_config=config,
                                                framework="torch",
                                                name="cnn_{}".format(i))
                # TODO (sven): add IMPALA-style option.
                # else:
                #    cnn = TorchImpalaVisionNet(
                #        component,
                #        action_space,
                #        num_outputs=None,
                #        model_config=config,
                #        name="cnn_{}".format(i))

                concat_size += cnn.num_outputs
                self.cnns[i] = cnn
                self.add_module("cnn_{}".format(i), cnn)
            # Discrete|MultiDiscrete inputs -> One-hot encode.
            elif isinstance(component, Discrete):
                self.one_hot[i] = True
                concat_size += component.n
            elif isinstance(component, MultiDiscrete):
                self.one_hot[i] = True
                concat_size += sum(component.nvec)
            # Everything else (1D Box).
            else:
                self.flatten[i] = int(np.product(component.shape))
                concat_size += self.flatten[i]

        # Optional post-concat FC-stack.
        post_fc_stack_config = {
            "fcnet_hiddens": model_config.get("post_fcnet_hiddens", []),
            "fcnet_activation": model_config.get("post_fcnet_activation",
                                                 "relu")
        }
        self.post_fc_stack = ModelCatalog.get_model_v2(Box(
            float("-inf"),
            float("inf"),
            shape=(concat_size, ),
            dtype=np.float32),
                                                       self.action_space,
                                                       None,
                                                       post_fc_stack_config,
                                                       framework="torch",
                                                       name="post_fc_stack")

        # Actions and value heads.
        self.logits_layer = None
        self.value_layer = None
        self._value_out = None

        if num_outputs:
            # Action-distribution head.
            self.logits_layer = SlimFC(
                in_size=self.post_fc_stack.num_outputs,
                out_size=num_outputs,
                activation_fn=None,
            )
            # Create the value branch model.
            self.value_layer = SlimFC(
                in_size=self.post_fc_stack.num_outputs,
                out_size=1,
                activation_fn=None,
                initializer=torch_normc_initializer(0.01))
        else:
            self.num_outputs = concat_size
Ejemplo n.º 9
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        self.original_space = obs_space.original_space if \
            hasattr(obs_space, "original_space") else obs_space
        assert isinstance(self.original_space, (Tuple)), \
            "`obs_space.original_space` must be Tuple!"

        nn.Module.__init__(self)
        TorchModelV2.__init__(self, self.original_space, action_space,
                              num_outputs, model_config, name)
        self.new_obs_space = obs_space
        # Atari type CNNs or IMPALA type CNNs (with residual layers)?
        # self.cnn_type = self.model_config["custom_model_config"].get(
        #     "conv_type", "atari")

        # Build the CNN(s) given obs_space's image components.
        self.cnns = {}
        self.one_hot = {}
        self.flatten = {}
        concat_size_p, concat_size_v = 0, 0
        for i, component in enumerate(self.original_space[:-1]):
            # Image space.
            if len(component.shape) == 3:
                config = {
                    "conv_filters":
                    model_config["conv_filters"] if "conv_filters"
                    in model_config else get_filter_config(obs_space.shape),
                    "conv_activation":
                    model_config.get("conv_activation"),
                    "post_fcnet_hiddens": [],
                }
                # if self.cnn_type == "atari":
                cnn = TorchBatchNormModel(component, action_space, None,
                                          config, 'cnn_{}'.format(i))
                print(cnn)
                concat_size_p += cnn.num_outputs_p
                concat_size_v += cnn.num_outputs_v
                self.cnns[i] = cnn
                self.add_module("cnn_{}".format(i), cnn)
            # Discrete inputs -> One-hot encode.
            elif isinstance(component, Discrete):
                self.one_hot[i] = True
                concat_size_p += component.n
                concat_size_v += component.n
            # Everything else (1D Box).
            else:
                self.flatten[i] = int(np.product(component.shape))
                concat_size_p += self.flatten[i]
                concat_size_v += self.flatten[i]

        hidden_size = model_config.get("post_fcnet_hiddens", [])
        self.post_fc_stack = nn.Sequential(
            SlimFC(concat_size_p,
                   hidden_size[0],
                   initializer=torch_normc_initializer(1.0),
                   activation_fn=None), nn.BatchNorm1d(hidden_size[0]),
            nn.ReLU())
        self.post_fc_stack_vf = nn.Sequential(
            SlimFC(concat_size_v,
                   hidden_size[0],
                   initializer=torch_normc_initializer(1.0),
                   activation_fn=None), nn.BatchNorm1d(hidden_size[0]),
            nn.ReLU())

        # Actions and value heads.
        self.logits_layer = None
        self.value_layer = None
        self._value_out = None

        if num_outputs:
            # Action-distribution head.
            self.logits_layer = SlimFC(
                in_size=hidden_size[0],
                out_size=num_outputs,
                initializer=torch_normc_initializer(0.01),
                activation_fn=None,
            )
            # Create the value branch model.
            self.value_layer = SlimFC(
                in_size=hidden_size[0],
                out_size=1,
                initializer=torch_normc_initializer(1.0),
                activation_fn='tanh',
            )
        else:
            raise NotImplementedError()
Ejemplo n.º 10
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        # TODO: (sven) Support Dicts as well.
        assert isinstance(obs_space.original_space, (Tuple)), \
            "`obs_space.original_space` must be Tuple!"

        nn.Module.__init__(self)
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)

        # Atari type CNNs or IMPALA type CNNs (with residual layers)?
        self.cnn_type = self.model_config["custom_model_config"].get(
            "conv_type", "atari")

        # Build the CNN(s) given obs_space's image components.
        self.cnns = {}
        concat_size = 0
        for i, component in enumerate(obs_space.original_space):
            # Image space.
            if len(component.shape) == 3:
                config = {
                    "conv_filters":
                    model_config.get("conv_filters",
                                     get_filter_config(component.shape)),
                    "conv_activation":
                    model_config.get("conv_activation"),
                }
                if self.cnn_type == "atari":
                    cnn = ModelCatalog.get_model_v2(component,
                                                    action_space,
                                                    num_outputs=None,
                                                    model_config=config,
                                                    framework="torch",
                                                    name="cnn_{}".format(i))
                else:
                    cnn = TorchImpalaVisionNet(component,
                                               action_space,
                                               num_outputs=None,
                                               model_config=config,
                                               name="cnn_{}".format(i))

                concat_size += cnn.num_outputs
                self.cnns[i] = cnn
                self.add_module("cnn_{}".format(i), cnn)
            # Discrete inputs -> One-hot encode.
            elif isinstance(component, Discrete):
                concat_size += component.n
            # TODO: (sven) Multidiscrete (see e.g. our auto-LSTM wrappers).
            # Everything else (1D Box).
            else:
                assert len(component.shape) == 1, \
                    "Only input Box 1D or 3D spaces allowed!"
                concat_size += component.shape[-1]

        self.logits_layer = None
        self.value_layer = None
        self._value_out = None

        if num_outputs:
            # Action-distribution head.
            self.logits_layer = SlimFC(
                in_size=concat_size,
                out_size=num_outputs,
                activation_fn=None,
            )
            # Create the value branch model.
            self.value_layer = SlimFC(
                in_size=concat_size,
                out_size=1,
                activation_fn=None,
                initializer=torch_normc_initializer(0.01))
        else:
            self.num_outputs = concat_size