コード例 #1
0
ファイル: agent.py プロジェクト: oidelima/ppo
    def __init__(self, num_inputs, hidden_size, num_layers, recurrent, activation):
        assert num_layers > 0
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size)

        if recurrent:
            num_inputs = hidden_size

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(x, 0))

        self.actor = nn.Sequential()
        self.critic = nn.Sequential()
        for i in range(num_layers):
            self.actor.add_module(
                name=f"fc{i}",
                module=nn.Sequential(
                    init_(nn.Linear(num_inputs, hidden_size)), activation
                ),
            )
            self.critic.add_module(
                name=f"fc{i}",
                module=nn.Sequential(
                    init_(nn.Linear(num_inputs, hidden_size)), activation
                ),
            )
            num_inputs = hidden_size

        self.critic_linear = init_(nn.Linear(num_inputs, 1))

        self.train()
コード例 #2
0
ファイル: agent.py プロジェクト: oidelima/ppo
    def __init__(self, d, h, w, activation, hidden_size, num_layers, recurrent=False):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        self.main = nn.Sequential(
            init_(nn.Conv2d(d, hidden_size, kernel_size=1)),
            activation,
            *[
                nn.Sequential(
                    init_(
                        nn.Conv2d(hidden_size, hidden_size, kernel_size=1), activation
                    ),
                    activation,
                )
                for _ in range(num_layers)
            ],
            # init_(nn.Conv2d(d, 32, 8, stride=4)), nn.ReLU(),
            # init_(nn.Conv2d(32, 64, kernel_size=4, stride=2)), nn.ReLU(),
            # init_(nn.Conv2d(32, 64, kernel_size=4, stride=2)), nn.ReLU(),
            # init_(nn.Conv2d(64, 32, kernel_size=3, stride=1)),
            activation,
            Flatten(),
            # init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())
            init_(nn.Linear(hidden_size * h * w, hidden_size)),
            activation,
        )

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
コード例 #3
0
ファイル: gates.py プロジェクト: oidelima/ppo
 def generate_modules(k):
     n_pools = min(len(pool_strides), len(pool_kernels))
     n_conv = min(len(conv_layers), len(conv_strides),
                  len(conv_kernels))
     conv_iterator = generate_convolutions(k)
     try:
         k, conv = next(conv_iterator)
         yield conv
         pool_iterator = None
         for i in itertools.count():
             if pool_iterator is None:
                 if i >= n_conv - n_pools and pool_type is not None:
                     pool_iterator = generate_pools(k)
                     k, pool = next(pool_iterator)
                     yield pool
             else:
                 k, pool = pool_iterator.send(k)
                 yield pool
             k, conv = conv_iterator.send(k)
             yield conv
     except StopIteration:
         pass
     out_size = k**2 * conv.out_channels
     yield Flatten(out_size=out_size)
     if not concat:
         yield init_(nn.Linear(out_size, hidden_size))
コード例 #4
0
    def __init__(
        self,
        observation_space,
        action_space,
        activation,
        hidden_size,
        num_layers,
        num_edges,
        num_encoding_layers,
        debug,
        no_scan,
        no_roll,
    ):
        super().__init__()
        self.no_roll = no_roll
        self.no_scan = no_scan
        self.obs_spaces = Obs(**observation_space.spaces)
        self.obs_sections = Obs(
            *[int(np.prod(s.shape)) for s in self.obs_spaces])
        self.action_size = 2
        self.debug = debug
        self.hidden_size = hidden_size

        # networks
        self.ne = num_edges
        nt = int(self.obs_spaces.lines.nvec[0])
        n_a, n_p = map(int, action_space.nvec)
        self.n_a = n_a
        self.embed_task = nn.Embedding(nt, hidden_size)
        self.embed_action = nn.Embedding(n_a, hidden_size)
        self.task_encoder = nn.GRU(hidden_size,
                                   hidden_size,
                                   bidirectional=True,
                                   batch_first=True)
        in_size = self.obs_sections.condition + hidden_size * 2
        self.gru = nn.GRUCell(in_size, hidden_size)

        layers = []
        for _ in range(num_layers):
            layers.extend(
                [init_(nn.Linear(hidden_size, hidden_size)), activation])
        self.mlp = nn.Sequential(*layers)
        self.option = init_(nn.Linear(hidden_size, self.ne))

        layers = []
        in_size = hidden_size
        for _ in range(num_encoding_layers - 1):
            layers.extend([init_(nn.Linear(in_size, hidden_size)), activation])
            in_size = hidden_size
        self.mlp2 = nn.Sequential(*layers, init_(nn.Linear(in_size, self.ne)))

        self.stuff = init_(nn.Linear(hidden_size, 1))
        self.critic = init_(nn.Linear(hidden_size, 1))
        self.actor = Categorical(hidden_size, n_a)
        self.attention = Categorical(hidden_size, n_a)
        self.state_sizes = RecurrentState(a=1, a_probs=n_a, v=1, h=hidden_size)
        first = torch.zeros(1, 1, 2 * self.obs_sections.lines, 1)
        first[0, 0, 0] = 1
        self.register_buffer("first", first)
コード例 #5
0
    def __init__(
        self,
        observation_space,
        action_space,
        hidden_size,
        task_embed_size,
        conv_hidden_size,
        num_layers,
        entropy_coef,
        stride,
        kernel_size,
        lower_embed_size,
        **network_args,
    ):
        self.obs_spaces = Obs(**observation_space.spaces)
        nn.Module.__init__(self)
        abstract_recurrence.Recurrence.__init__(self)
        self.action_size = action_space.nvec.size
        self.entropy_coef = entropy_coef
        self.hidden_size = hidden_size
        self.task_embed_size = task_embed_size
        self.obs_sections = Obs(*get_obs_sections(self.obs_spaces))
        self.train_lines = len(self.obs_spaces.lines.nvec)

        # networks
        n_a = int(action_space.nvec[0])
        self.embed_task = self.build_embed_task(hidden_size)
        self.embed_action = nn.Embedding(n_a, hidden_size)
        self.critic = init_(nn.Linear(hidden_size, 1))
        d, h, w = observation_space.obs.shape
        padding = optimal_padding(kernel_size, stride)
        self.conv = nn.Conv2d(
            in_channels=d,
            out_channels=conv_hidden_size,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
        )
        self.embed_lower = nn.Embedding(self.action_space_nvec.lower + 1,
                                        lower_embed_size)

        self.dist = Categorical(hidden_size, n_a)
        network_args.update(recurrent=True, activation=nn.ReLU())
        self.recurrent_module = MLPBase(
            num_inputs=conv_hidden_size + self.train_lines * hidden_size,
            hidden_size=hidden_size,
            num_layers=num_layers + 1,
            **network_args,
        )
コード例 #6
0
ファイル: gates.py プロジェクト: oidelima/ppo
 def generate_convolutions(k):
     in_size = d
     for (layer, kernel, stride) in zip(conv_layers, conv_kernels,
                                        conv_strides):
         kernel = min(k, kernel)
         padding = (kernel // 2) % stride
         conv = init_(
             nn.Conv2d(
                 in_channels=in_size,
                 out_channels=layer,
                 kernel_size=kernel,
                 stride=stride,
                 padding=padding,
             ))
         k = int((k + (2 * padding) - (kernel - 1) - 1) // stride + 1)
         k = yield k, conv
         in_size = layer
コード例 #7
0
ファイル: gates.py プロジェクト: oidelima/ppo
    def __init__(
        self,
        d,
        h,
        w,
        line_nvec,
        conv_layers,
        conv_kernels,
        conv_strides,
        pool_type,
        pool_kernels,
        pool_strides,
        action_size,
        line_hidden_size,
        lower_hidden_size,
        concat,
    ):
        super().__init__()
        self.concat = concat
        if not concat:
            hidden_size = lower_hidden_size

        def remove_none(xs):
            return [x for x in xs if x is not None]

        conv_layers = remove_none(conv_layers)
        conv_kernels = remove_none(conv_kernels)
        conv_strides = remove_none(conv_strides)
        pool_kernels = remove_none(pool_kernels)
        pool_strides = remove_none(pool_strides)

        def generate_pools(k):
            for (kernel, stride) in zip(pool_kernels, pool_strides):
                kernel = min(k, kernel)
                padding = (kernel // 2) % stride
                if pool_type == "avg":
                    pool = nn.AvgPool2d(kernel_size=kernel,
                                        stride=stride,
                                        padding=padding)
                elif pool_type == "max":
                    pool = nn.MaxPool2d(kernel_size=kernel,
                                        stride=stride,
                                        padding=padding)
                else:
                    raise RuntimeError
                k = int((k + 2 * padding - kernel) / stride + 1)
                k = yield k, pool

        def generate_convolutions(k):
            in_size = d
            for (layer, kernel, stride) in zip(conv_layers, conv_kernels,
                                               conv_strides):
                kernel = min(k, kernel)
                padding = (kernel // 2) % stride
                conv = init_(
                    nn.Conv2d(
                        in_channels=in_size,
                        out_channels=layer,
                        kernel_size=kernel,
                        stride=stride,
                        padding=padding,
                    ))
                k = int((k + (2 * padding) - (kernel - 1) - 1) // stride + 1)
                k = yield k, conv
                in_size = layer

        def generate_modules(k):
            n_pools = min(len(pool_strides), len(pool_kernels))
            n_conv = min(len(conv_layers), len(conv_strides),
                         len(conv_kernels))
            conv_iterator = generate_convolutions(k)
            try:
                k, conv = next(conv_iterator)
                yield conv
                pool_iterator = None
                for i in itertools.count():
                    if pool_iterator is None:
                        if i >= n_conv - n_pools and pool_type is not None:
                            pool_iterator = generate_pools(k)
                            k, pool = next(pool_iterator)
                            yield pool
                    else:
                        k, pool = pool_iterator.send(k)
                        yield pool
                    k, conv = conv_iterator.send(k)
                    yield conv
            except StopIteration:
                pass
            out_size = k**2 * conv.out_channels
            yield Flatten(out_size=out_size)
            if not concat:
                yield init_(nn.Linear(out_size, hidden_size))

        *obs_modules, flatten = generate_modules(h)
        self.conv = nn.Sequential(*obs_modules, flatten)
        if not concat:
            line_hidden_size = hidden_size
            lower_hidden_size = hidden_size
        offset = F.pad(line_nvec.cumsum(0), [1, 0])
        self.register_buffer("offset", offset)
        self.embed_line = nn.EmbeddingBag(line_nvec.sum(), line_hidden_size)
        self.embed_lower = nn.Embedding(action_size, lower_hidden_size)
        self.out = init_(
            nn.Linear(
                flatten.out_size + line_hidden_size +
                lower_hidden_size if concat else hidden_size,
                1,
            ))
コード例 #8
0
 def __init__(
     self,
     hidden2,
     hidden_size,
     conv_hidden_size,
     fuzz,
     critic_type,
     gate_hidden_size,
     gate_conv_kernel_size,
     gate_coef,
     gate_stride,
     observation_space,
     lower_level_load_path,
     lower_embed_size,
     kernel_size,
     stride,
     action_space,
     lower_level_config,
     task_embed_size,
     num_edges,
     **kwargs,
 ):
     self.critic_type = critic_type
     self.fuzz = fuzz
     self.gate_coef = gate_coef
     self.conv_hidden_size = conv_hidden_size
     self.kernel_size = kernel_size
     self.stride = stride
     self.gate_hidden_size = gate_hidden_size
     self.gate_kernel_size = gate_conv_kernel_size
     self.gate_stride = gate_stride
     observation_space = Obs(**observation_space.spaces)
     recurrence.Recurrence.__init__(
         self,
         hidden_size=hidden_size,
         gate_hidden_size=gate_hidden_size,
         task_embed_size=task_embed_size,
         observation_space=observation_space,
         action_space=action_space,
         num_edges=num_edges,
         **kwargs,
     )
     self.conv_hidden_size = conv_hidden_size
     abstract_recurrence.Recurrence.__init__(self)
     d, h, w = observation_space.obs.shape
     self.kernel_size = min(d, kernel_size)
     padding = optimal_padding(h, kernel_size, stride) + 1
     self.conv = nn.Conv2d(
         in_channels=d,
         out_channels=conv_hidden_size,
         kernel_size=self.kernel_size,
         stride=stride,
         padding=padding,
     )
     self.embed_lower = nn.Embedding(self.action_space_nvec.lower + 1,
                                     lower_embed_size)
     inventory_size = self.obs_spaces.inventory.n
     inventory_hidden_size = gate_hidden_size
     self.embed_inventory = nn.Sequential(
         init_(nn.Linear(inventory_size, inventory_hidden_size)), nn.ReLU())
     m_size = (2 * self.task_embed_size +
               hidden_size if self.no_pointer else self.task_embed_size)
     self.zeta = init_(
         nn.Linear(conv_hidden_size + m_size + inventory_hidden_size,
                   hidden_size))
     output_dim = conv_output_dimension(h=h,
                                        padding=padding,
                                        kernel=kernel_size,
                                        stride=stride)
     self.gate_padding = optimal_padding(h, gate_conv_kernel_size,
                                         gate_stride)
     output_dim2 = conv_output_dimension(
         h=output_dim,
         padding=self.gate_padding,
         kernel=self.gate_kernel_size,
         stride=self.gate_stride,
     )
     z2_size = m_size + hidden2 + gate_hidden_size * output_dim2**2
     self.d_gate = Categorical(z2_size, 2)
     self.linear1 = nn.Linear(
         m_size,
         conv_hidden_size * gate_conv_kernel_size**2 * gate_hidden_size)
     self.conv_bias = nn.Parameter(torch.zeros(gate_hidden_size))
     self.linear2 = nn.Linear(m_size + lower_embed_size, hidden2)
     if self.critic_type == "z":
         self.critic = init_(nn.Linear(hidden_size, 1))
     elif self.critic_type == "h1":
         self.critic = init_(nn.Linear(gate_hidden_size * output_dim2**2,
                                       1))
     elif self.critic_type == "z3":
         self.critic = init_(nn.Linear(gate_hidden_size, 1))
     elif self.critic_type == "combined":
         self.critic = init_(nn.Linear(hidden_size + z2_size, 1))
     elif self.critic_type == "multi-layer":
         self.critic = nn.Sequential(
             init_(nn.Linear(hidden_size + z2_size, hidden_size)),
             nn.ReLU(),
             init_(nn.Linear(hidden_size, 1)),
         )
     state_sizes = self.state_sizes._asdict()
     with lower_level_config.open() as f:
         lower_level_params = json.load(f)
     ll_action_space = spaces.Discrete(Action(*action_space.nvec).lower)
     self.state_sizes = RecurrentState(
         **state_sizes,
         dg_probs=2,
         dg=1,
         l=1,
         l_probs=ll_action_space.n,
         lh=lower_level_params["hidden_size"],
     )
     self.lower_level = Agent(
         obs_spaces=observation_space,
         entropy_coef=0,
         action_space=ll_action_space,
         lower_level=True,
         num_layers=1,
         **lower_level_params,
     )
     if lower_level_load_path is not None:
         state_dict = torch.load(lower_level_load_path, map_location="cpu")
         self.lower_level.load_state_dict(state_dict["agent"])
         print(f"Loaded lower_level from {lower_level_load_path}.")
コード例 #9
0
ファイル: agent.py プロジェクト: oidelima/ppo
    def __init__(
        self,
        hidden_size,
        num_layers,
        recurrent,
        obs_space,
        num_conv_layers,
        kernel_size,
        stride,
        activation=nn.ReLU(),
        **_,
    ):
        if type(obs_space) is spaces.Dict:
            obs_space = Obs(**obs_space.spaces)
        assert num_layers > 0
        H = hidden_size
        super().__init__(
            recurrent=recurrent, recurrent_input_size=H, hidden_size=hidden_size
        )
        self.register_buffer(
            "subtasks",
            torch.tensor(
                [Env.preprocess_line(Subtask(s)) for s in subtasks()] + [[0, 0, 0, 0]]
            ),
        )
        (d, h, w) = obs_space.obs.shape
        inventory_size = obs_space.inventory.n
        line_nvec = torch.tensor(obs_space.lines.nvec)
        offset = F.pad(line_nvec[0, :-1].cumsum(0), [1, 0])
        self.register_buffer("offset", offset)
        self.obs_spaces = obs_space
        self.obs_sections = get_obs_sections(self.obs_spaces)
        padding = (kernel_size // 2) % stride

        self.conv = nn.Sequential()
        in_size = d
        assert num_conv_layers > 0
        for i in range(num_conv_layers):
            self.conv.add_module(
                name=f"conv{i}",
                module=nn.Sequential(
                    init_(
                        nn.Conv2d(
                            in_size,
                            hidden_size,
                            kernel_size=kernel_size,
                            stride=stride,
                            padding=padding,
                        )
                    ),
                    activation,
                ),
            )
            in_size = hidden_size
            h = w = (h + (2 * padding) - (kernel_size - 1) - 1) // stride + 1
            kernel_size = min(h, kernel_size)
        self.conv.add_module(name="flatten", module=Flatten())
        init2 = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(x, 0))

        self.conv_projection = nn.Sequential(
            init2(nn.Linear(h * w * hidden_size, hidden_size)), activation
        )
        self.line_embed = nn.EmbeddingBag(line_nvec[0].sum(), hidden_size)
        self.inventory_embed = nn.Sequential(
            init2(nn.Linear(inventory_size, hidden_size)), activation
        )

        self.mlp = nn.Sequential()
        in_size = hidden_size if recurrent else H
        for i in range(num_layers):
            self.mlp.add_module(
                name=f"fc{i}",
                module=nn.Sequential(
                    init2(nn.Linear(in_size, hidden_size)), activation
                ),
            )
            in_size = hidden_size

        self.critic_linear = init2(nn.Linear(in_size, 1))
        self._output_size = in_size
        self.train()
コード例 #10
0
ファイル: recurrence.py プロジェクト: oidelima/ppo
    def __init__(
        self,
        observation_space,
        action_space,
        eval_lines,
        activation,
        hidden_size,
        gate_hidden_size,
        task_embed_size,
        num_layers,
        num_edges,
        num_encoding_layers,
        debug,
        no_scan,
        no_roll,
        no_pointer,
        transformer,
        olsk,
        log_dir,
    ):
        super().__init__()
        if olsk:
            num_edges = 3
        self.olsk = olsk
        self.no_pointer = no_pointer
        self.transformer = transformer
        self.log_dir = log_dir
        self.no_roll = no_roll
        self.no_scan = no_scan
        self.obs_spaces = observation_space
        self.action_size = action_space.nvec.size
        self.debug = debug
        self.hidden_size = hidden_size
        self.task_embed_size = task_embed_size

        self.obs_sections = self.get_obs_sections(self.obs_spaces)
        self.eval_lines = eval_lines
        self.train_lines = len(self.obs_spaces.lines.nvec)

        # networks
        self.ne = num_edges
        self.action_space_nvec = Action(*map(int, action_space.nvec))
        n_a = self.action_space_nvec.upper
        self.n_a = n_a
        self.embed_task = self.build_embed_task(task_embed_size)
        self.embed_upper = nn.Embedding(n_a, hidden_size)
        self.task_encoder = (TransformerModel(
            ntoken=self.ne * self.d_space(),
            ninp=task_embed_size,
            nhid=task_embed_size,
        ) if transformer else nn.GRU(task_embed_size,
                                     task_embed_size,
                                     bidirectional=True,
                                     batch_first=True))
        # self.minimal_gru.py = nn.GRUCell(self.gru_in_size, gru_hidden_size)

        # layers = []
        # in_size = gru_hidden_size + 1
        # for _ in range(num_layers):
        # layers.extend([init_(nn.Linear(in_size, hidden_size)), activation])
        # in_size = hidden_size
        # self.zeta2 = nn.Sequential(*layers)
        if self.olsk:
            assert self.ne == 3
            self.upsilon = nn.GRUCell(gate_hidden_size, hidden_size)
            self.beta = init_(nn.Linear(hidden_size, self.ne))
        elif self.no_pointer:
            self.upsilon = nn.GRUCell(gate_hidden_size, hidden_size)
            self.beta = init_(nn.Linear(hidden_size, self.d_space()))
        else:
            self.upsilon = init_(nn.Linear(gate_hidden_size, self.ne))
            layers = []
            in_size = (2 if self.no_roll or self.no_scan else
                       1) * task_embed_size
            for _ in range(num_encoding_layers - 1):
                layers.extend(
                    [init_(nn.Linear(in_size, task_embed_size)), activation])
                in_size = task_embed_size
            out_size = self.ne * self.d_space() if self.no_scan else self.ne
            self.beta = nn.Sequential(*layers,
                                      init_(nn.Linear(in_size, out_size)))
        self.critic = init_(nn.Linear(hidden_size, 1))
        self.actor = Categorical(hidden_size, n_a)
        self.state_sizes = RecurrentState(
            a=1,
            a_probs=n_a,
            d=1,
            d_probs=(self.d_space()),
            h=hidden_size,
            p=1,
            v=1,
            P=(self.P_shape().prod()),
        )