def __init__(self, num_inputs, hidden_size, num_layers, recurrent, activation): assert num_layers > 0 super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size) if recurrent: num_inputs = hidden_size init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(x, 0)) self.actor = nn.Sequential() self.critic = nn.Sequential() for i in range(num_layers): self.actor.add_module( name=f"fc{i}", module=nn.Sequential( init_(nn.Linear(num_inputs, hidden_size)), activation ), ) self.critic.add_module( name=f"fc{i}", module=nn.Sequential( init_(nn.Linear(num_inputs, hidden_size)), activation ), ) num_inputs = hidden_size self.critic_linear = init_(nn.Linear(num_inputs, 1)) self.train()
def __init__(self, d, h, w, activation, hidden_size, num_layers, recurrent=False): super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) self.main = nn.Sequential( init_(nn.Conv2d(d, hidden_size, kernel_size=1)), activation, *[ nn.Sequential( init_( nn.Conv2d(hidden_size, hidden_size, kernel_size=1), activation ), activation, ) for _ in range(num_layers) ], # init_(nn.Conv2d(d, 32, 8, stride=4)), nn.ReLU(), # init_(nn.Conv2d(32, 64, kernel_size=4, stride=2)), nn.ReLU(), # init_(nn.Conv2d(32, 64, kernel_size=4, stride=2)), nn.ReLU(), # init_(nn.Conv2d(64, 32, kernel_size=3, stride=1)), activation, Flatten(), # init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) init_(nn.Linear(hidden_size * h * w, hidden_size)), activation, ) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def generate_modules(k): n_pools = min(len(pool_strides), len(pool_kernels)) n_conv = min(len(conv_layers), len(conv_strides), len(conv_kernels)) conv_iterator = generate_convolutions(k) try: k, conv = next(conv_iterator) yield conv pool_iterator = None for i in itertools.count(): if pool_iterator is None: if i >= n_conv - n_pools and pool_type is not None: pool_iterator = generate_pools(k) k, pool = next(pool_iterator) yield pool else: k, pool = pool_iterator.send(k) yield pool k, conv = conv_iterator.send(k) yield conv except StopIteration: pass out_size = k**2 * conv.out_channels yield Flatten(out_size=out_size) if not concat: yield init_(nn.Linear(out_size, hidden_size))
def __init__( self, observation_space, action_space, activation, hidden_size, num_layers, num_edges, num_encoding_layers, debug, no_scan, no_roll, ): super().__init__() self.no_roll = no_roll self.no_scan = no_scan self.obs_spaces = Obs(**observation_space.spaces) self.obs_sections = Obs( *[int(np.prod(s.shape)) for s in self.obs_spaces]) self.action_size = 2 self.debug = debug self.hidden_size = hidden_size # networks self.ne = num_edges nt = int(self.obs_spaces.lines.nvec[0]) n_a, n_p = map(int, action_space.nvec) self.n_a = n_a self.embed_task = nn.Embedding(nt, hidden_size) self.embed_action = nn.Embedding(n_a, hidden_size) self.task_encoder = nn.GRU(hidden_size, hidden_size, bidirectional=True, batch_first=True) in_size = self.obs_sections.condition + hidden_size * 2 self.gru = nn.GRUCell(in_size, hidden_size) layers = [] for _ in range(num_layers): layers.extend( [init_(nn.Linear(hidden_size, hidden_size)), activation]) self.mlp = nn.Sequential(*layers) self.option = init_(nn.Linear(hidden_size, self.ne)) layers = [] in_size = hidden_size for _ in range(num_encoding_layers - 1): layers.extend([init_(nn.Linear(in_size, hidden_size)), activation]) in_size = hidden_size self.mlp2 = nn.Sequential(*layers, init_(nn.Linear(in_size, self.ne))) self.stuff = init_(nn.Linear(hidden_size, 1)) self.critic = init_(nn.Linear(hidden_size, 1)) self.actor = Categorical(hidden_size, n_a) self.attention = Categorical(hidden_size, n_a) self.state_sizes = RecurrentState(a=1, a_probs=n_a, v=1, h=hidden_size) first = torch.zeros(1, 1, 2 * self.obs_sections.lines, 1) first[0, 0, 0] = 1 self.register_buffer("first", first)
def __init__( self, observation_space, action_space, hidden_size, task_embed_size, conv_hidden_size, num_layers, entropy_coef, stride, kernel_size, lower_embed_size, **network_args, ): self.obs_spaces = Obs(**observation_space.spaces) nn.Module.__init__(self) abstract_recurrence.Recurrence.__init__(self) self.action_size = action_space.nvec.size self.entropy_coef = entropy_coef self.hidden_size = hidden_size self.task_embed_size = task_embed_size self.obs_sections = Obs(*get_obs_sections(self.obs_spaces)) self.train_lines = len(self.obs_spaces.lines.nvec) # networks n_a = int(action_space.nvec[0]) self.embed_task = self.build_embed_task(hidden_size) self.embed_action = nn.Embedding(n_a, hidden_size) self.critic = init_(nn.Linear(hidden_size, 1)) d, h, w = observation_space.obs.shape padding = optimal_padding(kernel_size, stride) self.conv = nn.Conv2d( in_channels=d, out_channels=conv_hidden_size, kernel_size=kernel_size, stride=stride, padding=padding, ) self.embed_lower = nn.Embedding(self.action_space_nvec.lower + 1, lower_embed_size) self.dist = Categorical(hidden_size, n_a) network_args.update(recurrent=True, activation=nn.ReLU()) self.recurrent_module = MLPBase( num_inputs=conv_hidden_size + self.train_lines * hidden_size, hidden_size=hidden_size, num_layers=num_layers + 1, **network_args, )
def generate_convolutions(k): in_size = d for (layer, kernel, stride) in zip(conv_layers, conv_kernels, conv_strides): kernel = min(k, kernel) padding = (kernel // 2) % stride conv = init_( nn.Conv2d( in_channels=in_size, out_channels=layer, kernel_size=kernel, stride=stride, padding=padding, )) k = int((k + (2 * padding) - (kernel - 1) - 1) // stride + 1) k = yield k, conv in_size = layer
def __init__( self, d, h, w, line_nvec, conv_layers, conv_kernels, conv_strides, pool_type, pool_kernels, pool_strides, action_size, line_hidden_size, lower_hidden_size, concat, ): super().__init__() self.concat = concat if not concat: hidden_size = lower_hidden_size def remove_none(xs): return [x for x in xs if x is not None] conv_layers = remove_none(conv_layers) conv_kernels = remove_none(conv_kernels) conv_strides = remove_none(conv_strides) pool_kernels = remove_none(pool_kernels) pool_strides = remove_none(pool_strides) def generate_pools(k): for (kernel, stride) in zip(pool_kernels, pool_strides): kernel = min(k, kernel) padding = (kernel // 2) % stride if pool_type == "avg": pool = nn.AvgPool2d(kernel_size=kernel, stride=stride, padding=padding) elif pool_type == "max": pool = nn.MaxPool2d(kernel_size=kernel, stride=stride, padding=padding) else: raise RuntimeError k = int((k + 2 * padding - kernel) / stride + 1) k = yield k, pool def generate_convolutions(k): in_size = d for (layer, kernel, stride) in zip(conv_layers, conv_kernels, conv_strides): kernel = min(k, kernel) padding = (kernel // 2) % stride conv = init_( nn.Conv2d( in_channels=in_size, out_channels=layer, kernel_size=kernel, stride=stride, padding=padding, )) k = int((k + (2 * padding) - (kernel - 1) - 1) // stride + 1) k = yield k, conv in_size = layer def generate_modules(k): n_pools = min(len(pool_strides), len(pool_kernels)) n_conv = min(len(conv_layers), len(conv_strides), len(conv_kernels)) conv_iterator = generate_convolutions(k) try: k, conv = next(conv_iterator) yield conv pool_iterator = None for i in itertools.count(): if pool_iterator is None: if i >= n_conv - n_pools and pool_type is not None: pool_iterator = generate_pools(k) k, pool = next(pool_iterator) yield pool else: k, pool = pool_iterator.send(k) yield pool k, conv = conv_iterator.send(k) yield conv except StopIteration: pass out_size = k**2 * conv.out_channels yield Flatten(out_size=out_size) if not concat: yield init_(nn.Linear(out_size, hidden_size)) *obs_modules, flatten = generate_modules(h) self.conv = nn.Sequential(*obs_modules, flatten) if not concat: line_hidden_size = hidden_size lower_hidden_size = hidden_size offset = F.pad(line_nvec.cumsum(0), [1, 0]) self.register_buffer("offset", offset) self.embed_line = nn.EmbeddingBag(line_nvec.sum(), line_hidden_size) self.embed_lower = nn.Embedding(action_size, lower_hidden_size) self.out = init_( nn.Linear( flatten.out_size + line_hidden_size + lower_hidden_size if concat else hidden_size, 1, ))
def __init__( self, hidden2, hidden_size, conv_hidden_size, fuzz, critic_type, gate_hidden_size, gate_conv_kernel_size, gate_coef, gate_stride, observation_space, lower_level_load_path, lower_embed_size, kernel_size, stride, action_space, lower_level_config, task_embed_size, num_edges, **kwargs, ): self.critic_type = critic_type self.fuzz = fuzz self.gate_coef = gate_coef self.conv_hidden_size = conv_hidden_size self.kernel_size = kernel_size self.stride = stride self.gate_hidden_size = gate_hidden_size self.gate_kernel_size = gate_conv_kernel_size self.gate_stride = gate_stride observation_space = Obs(**observation_space.spaces) recurrence.Recurrence.__init__( self, hidden_size=hidden_size, gate_hidden_size=gate_hidden_size, task_embed_size=task_embed_size, observation_space=observation_space, action_space=action_space, num_edges=num_edges, **kwargs, ) self.conv_hidden_size = conv_hidden_size abstract_recurrence.Recurrence.__init__(self) d, h, w = observation_space.obs.shape self.kernel_size = min(d, kernel_size) padding = optimal_padding(h, kernel_size, stride) + 1 self.conv = nn.Conv2d( in_channels=d, out_channels=conv_hidden_size, kernel_size=self.kernel_size, stride=stride, padding=padding, ) self.embed_lower = nn.Embedding(self.action_space_nvec.lower + 1, lower_embed_size) inventory_size = self.obs_spaces.inventory.n inventory_hidden_size = gate_hidden_size self.embed_inventory = nn.Sequential( init_(nn.Linear(inventory_size, inventory_hidden_size)), nn.ReLU()) m_size = (2 * self.task_embed_size + hidden_size if self.no_pointer else self.task_embed_size) self.zeta = init_( nn.Linear(conv_hidden_size + m_size + inventory_hidden_size, hidden_size)) output_dim = conv_output_dimension(h=h, padding=padding, kernel=kernel_size, stride=stride) self.gate_padding = optimal_padding(h, gate_conv_kernel_size, gate_stride) output_dim2 = conv_output_dimension( h=output_dim, padding=self.gate_padding, kernel=self.gate_kernel_size, stride=self.gate_stride, ) z2_size = m_size + hidden2 + gate_hidden_size * output_dim2**2 self.d_gate = Categorical(z2_size, 2) self.linear1 = nn.Linear( m_size, conv_hidden_size * gate_conv_kernel_size**2 * gate_hidden_size) self.conv_bias = nn.Parameter(torch.zeros(gate_hidden_size)) self.linear2 = nn.Linear(m_size + lower_embed_size, hidden2) if self.critic_type == "z": self.critic = init_(nn.Linear(hidden_size, 1)) elif self.critic_type == "h1": self.critic = init_(nn.Linear(gate_hidden_size * output_dim2**2, 1)) elif self.critic_type == "z3": self.critic = init_(nn.Linear(gate_hidden_size, 1)) elif self.critic_type == "combined": self.critic = init_(nn.Linear(hidden_size + z2_size, 1)) elif self.critic_type == "multi-layer": self.critic = nn.Sequential( init_(nn.Linear(hidden_size + z2_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, 1)), ) state_sizes = self.state_sizes._asdict() with lower_level_config.open() as f: lower_level_params = json.load(f) ll_action_space = spaces.Discrete(Action(*action_space.nvec).lower) self.state_sizes = RecurrentState( **state_sizes, dg_probs=2, dg=1, l=1, l_probs=ll_action_space.n, lh=lower_level_params["hidden_size"], ) self.lower_level = Agent( obs_spaces=observation_space, entropy_coef=0, action_space=ll_action_space, lower_level=True, num_layers=1, **lower_level_params, ) if lower_level_load_path is not None: state_dict = torch.load(lower_level_load_path, map_location="cpu") self.lower_level.load_state_dict(state_dict["agent"]) print(f"Loaded lower_level from {lower_level_load_path}.")
def __init__( self, hidden_size, num_layers, recurrent, obs_space, num_conv_layers, kernel_size, stride, activation=nn.ReLU(), **_, ): if type(obs_space) is spaces.Dict: obs_space = Obs(**obs_space.spaces) assert num_layers > 0 H = hidden_size super().__init__( recurrent=recurrent, recurrent_input_size=H, hidden_size=hidden_size ) self.register_buffer( "subtasks", torch.tensor( [Env.preprocess_line(Subtask(s)) for s in subtasks()] + [[0, 0, 0, 0]] ), ) (d, h, w) = obs_space.obs.shape inventory_size = obs_space.inventory.n line_nvec = torch.tensor(obs_space.lines.nvec) offset = F.pad(line_nvec[0, :-1].cumsum(0), [1, 0]) self.register_buffer("offset", offset) self.obs_spaces = obs_space self.obs_sections = get_obs_sections(self.obs_spaces) padding = (kernel_size // 2) % stride self.conv = nn.Sequential() in_size = d assert num_conv_layers > 0 for i in range(num_conv_layers): self.conv.add_module( name=f"conv{i}", module=nn.Sequential( init_( nn.Conv2d( in_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding, ) ), activation, ), ) in_size = hidden_size h = w = (h + (2 * padding) - (kernel_size - 1) - 1) // stride + 1 kernel_size = min(h, kernel_size) self.conv.add_module(name="flatten", module=Flatten()) init2 = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(x, 0)) self.conv_projection = nn.Sequential( init2(nn.Linear(h * w * hidden_size, hidden_size)), activation ) self.line_embed = nn.EmbeddingBag(line_nvec[0].sum(), hidden_size) self.inventory_embed = nn.Sequential( init2(nn.Linear(inventory_size, hidden_size)), activation ) self.mlp = nn.Sequential() in_size = hidden_size if recurrent else H for i in range(num_layers): self.mlp.add_module( name=f"fc{i}", module=nn.Sequential( init2(nn.Linear(in_size, hidden_size)), activation ), ) in_size = hidden_size self.critic_linear = init2(nn.Linear(in_size, 1)) self._output_size = in_size self.train()
def __init__( self, observation_space, action_space, eval_lines, activation, hidden_size, gate_hidden_size, task_embed_size, num_layers, num_edges, num_encoding_layers, debug, no_scan, no_roll, no_pointer, transformer, olsk, log_dir, ): super().__init__() if olsk: num_edges = 3 self.olsk = olsk self.no_pointer = no_pointer self.transformer = transformer self.log_dir = log_dir self.no_roll = no_roll self.no_scan = no_scan self.obs_spaces = observation_space self.action_size = action_space.nvec.size self.debug = debug self.hidden_size = hidden_size self.task_embed_size = task_embed_size self.obs_sections = self.get_obs_sections(self.obs_spaces) self.eval_lines = eval_lines self.train_lines = len(self.obs_spaces.lines.nvec) # networks self.ne = num_edges self.action_space_nvec = Action(*map(int, action_space.nvec)) n_a = self.action_space_nvec.upper self.n_a = n_a self.embed_task = self.build_embed_task(task_embed_size) self.embed_upper = nn.Embedding(n_a, hidden_size) self.task_encoder = (TransformerModel( ntoken=self.ne * self.d_space(), ninp=task_embed_size, nhid=task_embed_size, ) if transformer else nn.GRU(task_embed_size, task_embed_size, bidirectional=True, batch_first=True)) # self.minimal_gru.py = nn.GRUCell(self.gru_in_size, gru_hidden_size) # layers = [] # in_size = gru_hidden_size + 1 # for _ in range(num_layers): # layers.extend([init_(nn.Linear(in_size, hidden_size)), activation]) # in_size = hidden_size # self.zeta2 = nn.Sequential(*layers) if self.olsk: assert self.ne == 3 self.upsilon = nn.GRUCell(gate_hidden_size, hidden_size) self.beta = init_(nn.Linear(hidden_size, self.ne)) elif self.no_pointer: self.upsilon = nn.GRUCell(gate_hidden_size, hidden_size) self.beta = init_(nn.Linear(hidden_size, self.d_space())) else: self.upsilon = init_(nn.Linear(gate_hidden_size, self.ne)) layers = [] in_size = (2 if self.no_roll or self.no_scan else 1) * task_embed_size for _ in range(num_encoding_layers - 1): layers.extend( [init_(nn.Linear(in_size, task_embed_size)), activation]) in_size = task_embed_size out_size = self.ne * self.d_space() if self.no_scan else self.ne self.beta = nn.Sequential(*layers, init_(nn.Linear(in_size, out_size))) self.critic = init_(nn.Linear(hidden_size, 1)) self.actor = Categorical(hidden_size, n_a) self.state_sizes = RecurrentState( a=1, a_probs=n_a, d=1, d_probs=(self.d_space()), h=hidden_size, p=1, v=1, P=(self.P_shape().prod()), )