def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)]): nn.Module.__init__(self) self.obs_shapes = obs_shapes hidden_units = 32 self.perception_dict = OrderedDict() self.perception_dict['order_feat'] = DenseBlock( in_keys='ordered_piece', out_keys='order_feat', in_shapes=self.obs_shapes['ordered_piece'], hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['selected_feat'] = DenseBlock( in_keys='selected_piece', out_keys='selected_feat', in_shapes=self.obs_shapes['selected_piece'], hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['latent'] = ConcatenationBlock( in_keys=['order_feat', 'selected_feat'], out_keys='latent', in_shapes=[[hidden_units], [hidden_units], [hidden_units]], concat_dim=-1) self.perception_dict['value'] = LinearOutputBlock( in_keys='latent', out_keys='value', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=1) in_keys = ['ordered_piece', 'selected_piece'] self.perception_net = InferenceBlock( in_keys=in_keys, out_keys='value', in_shapes=[self.obs_shapes[key] for key in in_keys], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['value'].apply(make_module_init_normc(0.01))
def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)]): super().__init__() self.obs_shapes = obs_shapes action_key = list(action_logits_shapes.keys())[0] # build perception part self.perception_dict = OrderedDict() self.perception_dict['embedding'] = DenseBlock( in_keys="observation", out_keys="embedding", in_shapes=obs_shapes['observation'], hidden_units=[256, 256], non_lin=non_lin) # build action head self.perception_dict[action_key] = LinearOutputBlock( in_keys="embedding", out_keys=action_key, in_shapes=self.perception_dict['embedding'].out_shapes(), output_units=action_logits_shapes[action_key][0]) self.perception_net = InferenceBlock( in_keys='observation', out_keys=action_key, in_shapes=[self.obs_shapes['observation']], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict[action_key].apply(make_module_init_normc(0.01))
def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: type(nn.Module)): super().__init__(obs_shapes, non_lin) for action_head_name in action_logits_shapes.keys(): head_hidden_units = [lambda out_shape: out_shape[0] * 5, lambda out_shape: out_shape[0] * 2, lambda out_shape: out_shape[0]] head_hidden_units = [func(action_logits_shapes[action_head_name]) for func in head_hidden_units] self.perception_dict[f'{action_head_name}_net'] = DenseBlock( in_keys='hidden_out', in_shapes=self.perception_dict['hidden_out'].out_shapes(), out_keys=f'{action_head_name}_net', hidden_units=head_hidden_units[:-1], non_lin=non_lin) self.perception_dict[f'{action_head_name}'] = LinearOutputBlock( in_keys=f'{action_head_name}_net', in_shapes=self.perception_dict[f'{action_head_name}_net'].out_shapes(), out_keys=action_head_name, output_units=head_hidden_units[-1] ) # Set up inference block self.perception_net = InferenceBlock( in_keys=list(self.obs_shapes.keys()), out_keys=list(action_logits_shapes.keys()), in_shapes=[self.obs_shapes[key] for key in self.obs_shapes.keys()], perception_blocks=self.perception_dict) self.perception_net.apply(make_module_init_normc(1.0)) for action_head_name in action_logits_shapes.keys(): self.perception_dict[f'{action_head_name}'].apply(make_module_init_normc(0.01))
class QCriticNetContinuous(nn.Module): """Simple Q critic for mixed action heads (that is not all discrete). As such it computes a single q_value output for all observations. """ def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_spaces_dict: Dict[Union[str, int], spaces.Space], non_lin: Union[str, type(nn.Module)]): super().__init__() self.obs_shapes = obs_shapes # build perception part self.perception_dict = OrderedDict() self.perception_dict['latent-obs'] = DenseBlock( in_keys="observation", out_keys="latent-obs", in_shapes=obs_shapes['observation'], hidden_units=[256], non_lin=non_lin) self.perception_dict['latent-act'] = DenseBlock( in_keys="action", out_keys="latent-act", in_shapes=obs_shapes['action'], hidden_units=[256], non_lin=non_lin) self.perception_dict['concat'] = ConcatenationBlock( in_keys=['latent-obs', 'latent-act'], in_shapes=self.perception_dict['latent-obs'].out_shapes() + self.perception_dict['latent-act'].out_shapes(), concat_dim=-1, out_keys='concat') self.perception_dict['latent'] = DenseBlock( in_keys="concat", out_keys="latent", in_shapes=self.perception_dict['concat'].out_shapes(), hidden_units=[256], non_lin=non_lin) # build action head self.perception_dict['q_value'] = LinearOutputBlock( in_keys="latent", out_keys="q_value", in_shapes=self.perception_dict['latent'].out_shapes(), output_units=1) self.perception_net = InferenceBlock( in_keys=['observation', 'action'], out_keys='q_value', in_shapes=[ self.obs_shapes['observation'], self.obs_shapes['action'] ], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['q_value'].apply(make_module_init_normc(0.01)) def forward(self, x): """ forward pass. """ return self.perception_net(x)
def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: type(nn.Module)): super().__init__(obs_shapes, non_lin) self.perception_dict['value_head_net'] = DenseBlock( in_keys='hidden_out', in_shapes=self.perception_dict['hidden_out'].out_shapes(), out_keys='value_head_net', hidden_units=[5, 2], non_lin=non_lin) self.perception_dict['value'] = LinearOutputBlock( in_keys='value_head_net', in_shapes=self.perception_dict['value_head_net'].out_shapes(), out_keys='value', output_units=1) # Set up inference block self.perception_net = InferenceBlock( in_keys=list(self.obs_shapes.keys()), out_keys='value', in_shapes=[self.obs_shapes[key] for key in self.obs_shapes.keys()], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['value'].apply(make_module_init_normc(0.01))
def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], hidden_units: List[int]): nn.Module.__init__(self) CustomComplexLatentNet.__init__(self, obs_shapes, non_lin, hidden_units) # build action heads self.perception_dict['value'] = LinearOutputBlock( in_keys='latent', out_keys='value', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=1) # build inference block in_keys = list(self.obs_shapes.keys()) self.perception_net = InferenceBlock( in_keys=in_keys, out_keys='value', in_shapes=[self.obs_shapes[key] for key in in_keys], perception_blocks=self.perception_dict) # apply weight init self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['value'].apply(make_module_init_normc(0.01))
def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], hidden_units: List[int]): super().__init__() # Maze relies on dictionaries to represent the inference graph self.perception_dict = OrderedDict() # build latent embedding block self.perception_dict['latent'] = DenseBlock( in_keys='observation', out_keys='latent', in_shapes=obs_shapes['observation'], hidden_units=hidden_units,non_lin=non_lin) # build action head self.perception_dict['action'] = LinearOutputBlock( in_keys='latent', out_keys='action', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=int(np.prod(action_logits_shapes["action"]))) # build inference block self.perception_net = InferenceBlock( in_keys='observation', out_keys='action', in_shapes=obs_shapes['observation'], perception_blocks=self.perception_dict) # apply weight init self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['action'].apply(make_module_init_normc(0.01))
def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], hidden_units: List[int]): nn.Module.__init__(self) CustomComplexLatentNet.__init__(self, obs_shapes, non_lin, hidden_units) # build action heads for action_key, action_shape in action_logits_shapes.items(): self.perception_dict[action_key] = LinearOutputBlock( in_keys='latent', out_keys=action_key, in_shapes=self.perception_dict['latent'].out_shapes(), output_units=int(np.prod(action_shape))) # build inference block in_keys = list(self.obs_shapes.keys()) # Specifically add 'latent_screen' as an out_key to the network, so it will get returned when calling the # forward method and can be reused by the critic network. out_keys = list(action_logits_shapes.keys()) + ['latent_screen'] self.perception_net = InferenceBlock( in_keys=in_keys, out_keys=out_keys, perception_blocks=self.perception_dict, in_shapes=[self.obs_shapes[key] for key in in_keys]) # apply weight init self.perception_net.apply(make_module_init_normc(1.0)) for action_key in action_logits_shapes.keys(): self.perception_dict[action_key].apply( make_module_init_normc(0.01))
def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], hidden_units: List[int]): nn.Module.__init__(self) # Maze relies on dictionaries to represent the inference graph self.perception_dict = OrderedDict() # build latent feature embedding block self.perception_dict['latent_inventory'] = DenseBlock( in_keys='observation_inventory', out_keys='latent_inventory', in_shapes=obs_shapes['observation_inventory'], hidden_units=[128], non_lin=non_lin) # Concatenate latent features self.perception_dict['latent_concat'] = ConcatenationBlock( in_keys=['latent_inventory', 'latent_screen'], out_keys='latent_concat', in_shapes=self.perception_dict['latent_inventory'].out_shapes() + [obs_shapes['latent_screen']], concat_dim=-1) # Add latent dense block self.perception_dict['latent_dense'] = DenseBlock( in_keys='latent_concat', out_keys='latent_dense', hidden_units=hidden_units, non_lin=non_lin, in_shapes=self.perception_dict['latent_concat'].out_shapes()) # Add recurrent block self.perception_dict['latent'] = LSTMLastStepBlock( in_keys='latent_dense', out_keys='latent', in_shapes=self.perception_dict['latent_dense'].out_shapes(), hidden_size=32, num_layers=1, bidirectional=False, non_lin=non_lin) # build action heads self.perception_dict['value'] = LinearOutputBlock( in_keys='latent', out_keys='value', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=1) # build inference block in_keys = list(obs_shapes.keys()) self.perception_net = InferenceBlock( in_keys=in_keys, out_keys='value', in_shapes=[obs_shapes[key] for key in in_keys], perception_blocks=self.perception_dict) # apply weight init self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['value'].apply(make_module_init_normc(0.01))
def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)]): nn.Module.__init__(self) # initialize the perception dictionary self.perception_dict = OrderedDict() # concatenate all observations in dictionary self.perception_dict['concat'] = ConcatenationBlock( in_keys=[ 'cart_position', 'cart_velocity', 'pole_angle', 'pole_angular_velocity' ], out_keys='concat', in_shapes=[ obs_shapes['cart_position'], obs_shapes['cart_velocity'], obs_shapes['pole_angle'], obs_shapes['pole_angular_velocity'] ], concat_dim=-1) # process concatenated representation with two dense layers self.perception_dict['embedding'] = DenseBlock( in_keys='concat', in_shapes=self.perception_dict['concat'].out_shapes(), hidden_units=[128, 128], non_lin=non_lin, out_keys='embedding') # add a linear output block self.perception_dict['value'] = LinearOutputBlock( in_keys='embedding', out_keys='value', in_shapes=self.perception_dict['embedding'].out_shapes(), output_units=1) # compile an inference block self.perception_net = InferenceBlock( in_keys=[ 'cart_position', 'cart_velocity', 'pole_angle', 'pole_angular_velocity' ], out_keys='value', in_shapes=[ obs_shapes[key] for key in [ 'cart_position', 'cart_velocity', 'pole_angle', 'pole_angular_velocity' ] ], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['value'].apply(make_module_init_normc(0.01))
def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_spaces_dict: Dict[Union[str, int], spaces.Space], non_lin: Union[str, type(nn.Module)]): super().__init__() self.obs_shapes = obs_shapes # build perception part self.perception_dict = OrderedDict() self.perception_dict['latent-obs'] = DenseBlock( in_keys="observation", out_keys="latent-obs", in_shapes=obs_shapes['observation'], hidden_units=[256], non_lin=non_lin) self.perception_dict['latent-act'] = DenseBlock( in_keys="action", out_keys="latent-act", in_shapes=obs_shapes['action'], hidden_units=[256], non_lin=non_lin) self.perception_dict['concat'] = ConcatenationBlock( in_keys=['latent-obs', 'latent-act'], in_shapes=self.perception_dict['latent-obs'].out_shapes() + self.perception_dict['latent-act'].out_shapes(), concat_dim=-1, out_keys='concat') self.perception_dict['latent'] = DenseBlock( in_keys="concat", out_keys="latent", in_shapes=self.perception_dict['concat'].out_shapes(), hidden_units=[256], non_lin=non_lin) # build action head self.perception_dict['q_value'] = LinearOutputBlock( in_keys="latent", out_keys="q_value", in_shapes=self.perception_dict['latent'].out_shapes(), output_units=1) self.perception_net = InferenceBlock( in_keys=['observation', 'action'], out_keys='q_value', in_shapes=[ self.obs_shapes['observation'], self.obs_shapes['action'] ], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['q_value'].apply(make_module_init_normc(0.01))
class CustomSharedComplexPolicyNet(nn.Module, CustomComplexLatentNet): """Simple feed forward policy network. :param obs_shapes: The shapes of all observations as a dict. :param action_logits_shapes: The shapes of all actions as a dict structure. :param non_lin: The nonlinear activation to be used. :param hidden_units: A list of units per hidden layer. """ def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], hidden_units: List[int]): nn.Module.__init__(self) CustomComplexLatentNet.__init__(self, obs_shapes, non_lin, hidden_units) # build action heads for action_key, action_shape in action_logits_shapes.items(): self.perception_dict[action_key] = LinearOutputBlock( in_keys='latent', out_keys=action_key, in_shapes=self.perception_dict['latent'].out_shapes(), output_units=int(np.prod(action_shape))) # build inference block in_keys = list(self.obs_shapes.keys()) # Specifically add 'latent_screen' as an out_key to the network, so it will get returned when calling the # forward method and can be reused by the critic network. out_keys = list(action_logits_shapes.keys()) + ['latent_screen'] self.perception_net = InferenceBlock( in_keys=in_keys, out_keys=out_keys, perception_blocks=self.perception_dict, in_shapes=[self.obs_shapes[key] for key in in_keys]) # apply weight init self.perception_net.apply(make_module_init_normc(1.0)) for action_key in action_logits_shapes.keys(): self.perception_dict[action_key].apply( make_module_init_normc(0.01)) def forward( self, in_tensor_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """Compute forward pass through the network. :param in_tensor_dict: Input tensor dict. :return: The computed output of the network. """ return self.perception_net(in_tensor_dict)
def __init__(self, obs_shapes: Dict[str, Sequence[int]], hidden_units: List[int], non_lin: nn.Module, support_range: Tuple[int, int]): super().__init__(obs_shapes, hidden_units, non_lin) # build categorical value head support_set_size = support_range[1] - support_range[0] + 1 self.perception_dict["probabilities"] = LinearOutputBlock( in_keys="latent", out_keys="probabilities", in_shapes=self.perception_dict["latent"].out_shapes(), output_units=support_set_size) # compute value as probability weighted sum of supports def _to_scalar(x: torch.Tensor) -> torch.Tensor: return support_to_scalar(x, support_range=support_range) self.perception_dict["value"] = FunctionalBlock( in_keys="probabilities", out_keys="value", in_shapes=self.perception_dict["probabilities"].out_shapes(), func=_to_scalar) module_init = make_module_init_normc(std=0.01) self.perception_dict["probabilities"].apply(module_init) # compile inference model self.net = InferenceBlock(in_keys=list(obs_shapes.keys()), out_keys=["probabilities", "value"], in_shapes=list(obs_shapes.values()), perception_blocks=self.perception_dict)
def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], hidden_units: List[int], head_units: List[int], non_lin=nn.Module): super().__init__(obs_shapes, hidden_units, non_lin) # build perception part self.perception_dict["head"] = DenseBlock( in_keys="latent", out_keys="head", in_shapes=self.perception_dict["latent"].out_shapes(), hidden_units=head_units, non_lin=self.non_lin) self.perception_dict['head'].apply(make_module_init_normc(std=1.0)) # build action head for action, shape in action_logits_shapes.items(): self.perception_dict[action] = LinearOutputBlock( in_keys="head", out_keys=action, in_shapes=self.perception_dict["head"].out_shapes(), output_units=action_logits_shapes[action][-1]) module_init = make_module_init_normc(std=0.01) self.perception_dict[action].apply(module_init) # compile inference model self.net = InferenceBlock(in_keys=list(obs_shapes.keys()), out_keys=list(action_logits_shapes.keys()) + ['latent'], in_shapes=list(obs_shapes.values()), perception_blocks=self.perception_dict)
def __init__(self, obs_shapes: Dict[str, Sequence[int]], head_units: List[int], non_lin: nn.Module): super().__init__() self.perception_dict: Dict[str, PerceptionBlock] = dict() # build action head # build perception part self.perception_dict["head"] = DenseBlock( in_keys="latent", out_keys="head", in_shapes=obs_shapes["latent"], hidden_units=head_units, non_lin=non_lin) self.perception_dict["value"] = LinearOutputBlock( in_keys="head", out_keys="value", in_shapes=self.perception_dict["head"].out_shapes(), output_units=1) self.perception_dict['head'].apply(make_module_init_normc(std=1.0)) self.perception_dict["value"].apply(make_module_init_normc(std=0.01)) # compile inference model self.net = InferenceBlock(in_keys=list(obs_shapes.keys()), out_keys="value", in_shapes=list(obs_shapes.values()), perception_blocks=self.perception_dict)
class CuttingValueNet(nn.Module): """The Value net (critic) computing the predicted reward from the observations. :param obs_shapes: The shapes of all observations as a dict. :param non_lin: The nonlinear activation to be used. """ def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)]): nn.Module.__init__(self) self.obs_shapes = obs_shapes hidden_units = 32 self.perception_dict = OrderedDict() self.perception_dict['order_feat'] = DenseBlock( in_keys='ordered_piece', out_keys='order_feat', in_shapes=self.obs_shapes['ordered_piece'], hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['selected_feat'] = DenseBlock( in_keys='selected_piece', out_keys='selected_feat', in_shapes=self.obs_shapes['selected_piece'], hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['latent'] = ConcatenationBlock( in_keys=['order_feat', 'selected_feat'], out_keys='latent', in_shapes=[[hidden_units], [hidden_units], [hidden_units]], concat_dim=-1) self.perception_dict['value'] = LinearOutputBlock( in_keys='latent', out_keys='value', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=1) in_keys = ['ordered_piece', 'selected_piece'] self.perception_net = InferenceBlock( in_keys=in_keys, out_keys='value', in_shapes=[self.obs_shapes[key] for key in in_keys], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['value'].apply(make_module_init_normc(0.01)) def forward(self, xx: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """Compute forward pass through the network :param xx: Input dict :return: The computed output of the network """ return self.perception_net(xx)
class DummyPolicyNet(DummyBaseNet): """Policy network. :param obs_shapes: The shapes of all observations as a dict. :param action_logits_shapes: The shapes of all actions as a dict structure. :param non_lin: The nonlinear activation to be used. """ def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: type(nn.Module)): super().__init__(obs_shapes, non_lin) for action_head_name in action_logits_shapes.keys(): head_hidden_units = [lambda out_shape: out_shape[0] * 5, lambda out_shape: out_shape[0] * 2, lambda out_shape: out_shape[0]] head_hidden_units = [func(action_logits_shapes[action_head_name]) for func in head_hidden_units] self.perception_dict[f'{action_head_name}_net'] = DenseBlock( in_keys='hidden_out', in_shapes=self.perception_dict['hidden_out'].out_shapes(), out_keys=f'{action_head_name}_net', hidden_units=head_hidden_units[:-1], non_lin=non_lin) self.perception_dict[f'{action_head_name}'] = LinearOutputBlock( in_keys=f'{action_head_name}_net', in_shapes=self.perception_dict[f'{action_head_name}_net'].out_shapes(), out_keys=action_head_name, output_units=head_hidden_units[-1] ) # Set up inference block self.perception_net = InferenceBlock( in_keys=list(self.obs_shapes.keys()), out_keys=list(action_logits_shapes.keys()), in_shapes=[self.obs_shapes[key] for key in self.obs_shapes.keys()], perception_blocks=self.perception_dict) self.perception_net.apply(make_module_init_normc(1.0)) for action_head_name in action_logits_shapes.keys(): self.perception_dict[f'{action_head_name}'].apply(make_module_init_normc(0.01)) def forward(self, xx: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """Compute forward pass through the network. :param xx: Input dict. :return: The computed output of the network. """ return self.perception_net(xx)
class CustomComplexCriticNet(nn.Module, CustomComplexLatentNet): """Simple feed forward policy network. :param obs_shapes: The shapes of all observations as a dict. :param non_lin: The nonlinear activation to be used. :param hidden_units: A list of units per hidden layer. """ def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], hidden_units: List[int]): nn.Module.__init__(self) CustomComplexLatentNet.__init__(self, obs_shapes, non_lin, hidden_units) # build action heads self.perception_dict['value'] = LinearOutputBlock( in_keys='latent', out_keys='value', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=1) # build inference block in_keys = list(self.obs_shapes.keys()) self.perception_net = InferenceBlock( in_keys=in_keys, out_keys='value', in_shapes=[self.obs_shapes[key] for key in in_keys], perception_blocks=self.perception_dict) # apply weight init self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['value'].apply(make_module_init_normc(0.01)) def forward( self, in_tensor_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """Compute forward pass through the network. :param in_tensor_dict: Input tensor dict. :return: The computed output of the network. """ return self.perception_net(in_tensor_dict)
class CustomCartpolePolicyNet(nn.Module): """Simple feed forward policy network. :param obs_shapes: The shapes of all observations as a dict. :param action_logits_shapes: The shapes of all actions as a dict structure. :param non_lin: The nonlinear activation to be used. :param hidden_units: A list of units per hidden layer. """ def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], hidden_units: List[int]): super().__init__() # Maze relies on dictionaries to represent the inference graph self.perception_dict = OrderedDict() # build latent embedding block self.perception_dict['latent'] = DenseBlock( in_keys='observation', out_keys='latent', in_shapes=obs_shapes['observation'], hidden_units=hidden_units,non_lin=non_lin) # build action head self.perception_dict['action'] = LinearOutputBlock( in_keys='latent', out_keys='action', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=int(np.prod(action_logits_shapes["action"]))) # build inference block self.perception_net = InferenceBlock( in_keys='observation', out_keys='action', in_shapes=obs_shapes['observation'], perception_blocks=self.perception_dict) # apply weight init self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['action'].apply(make_module_init_normc(0.01)) def forward(self, in_tensor_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """Compute forward pass through the network. :param in_tensor_dict: Input tensor dict. :return: The computed output of the network. """ return self.perception_net(in_tensor_dict)
class DummyValueNet(DummyBaseNet): """Policy network. :param obs_shapes: The shapes of all observations as a dict. :param non_lin: The nonlinear activation to be used. """ def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: type(nn.Module)): super().__init__(obs_shapes, non_lin) self.perception_dict['value_head_net'] = DenseBlock( in_keys='hidden_out', in_shapes=self.perception_dict['hidden_out'].out_shapes(), out_keys='value_head_net', hidden_units=[5, 2], non_lin=non_lin) self.perception_dict['value'] = LinearOutputBlock( in_keys='value_head_net', in_shapes=self.perception_dict['value_head_net'].out_shapes(), out_keys='value', output_units=1) # Set up inference block self.perception_net = InferenceBlock( in_keys=list(self.obs_shapes.keys()), out_keys='value', in_shapes=[self.obs_shapes[key] for key in self.obs_shapes.keys()], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['value'].apply(make_module_init_normc(0.01)) def forward(self, xx: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """Compute forward pass through the network. :param xx: Input dict. :return: The computed output of the network. """ return self.perception_net(xx)
def template_policy_net( self, observation_space: spaces.Dict, action_space: spaces.Dict, shared_embedding_keys: List[str] ) -> Tuple[InferenceBlock, InferenceBlock]: """Compiles a template policy network. :param observation_space: The input observations for the perception network. :param action_space: The action space that defines the network action heads. :param shared_embedding_keys: The list of embedding keys for this substep's model. :return: A policy network (actor) InferenceBlock, as well as the embedding net InferenceBlock if shared keys have been specified. """ # build perception net embedding_net = self.template_perception_net(observation_space) # build action head perception_dict = embedding_net.perception_dict action_heads = [] for action_head, action_space in action_space.spaces.items(): # initialize action head action_net = LinearOutputBlock( in_keys="latent", out_keys=action_head, in_shapes=perception_dict["latent"].out_shapes(), output_units=self._distribution_mapper.required_logits_shape( action_head)[0]) module_init = make_module_init_normc(std=0.01) action_net.apply(module_init) # extent perception dictionary perception_dict[action_head] = action_net action_heads.append(action_head) # compile inference model shared_embedding_keys_remove_input = list( filter(lambda x: x not in embedding_net.in_keys, shared_embedding_keys)) net = InferenceBlock(in_keys=embedding_net.in_keys, out_keys=action_heads + shared_embedding_keys_remove_input, in_shapes=embedding_net.in_shapes, perception_blocks=perception_dict) if len(shared_embedding_keys_remove_input) == 0: embedding_net = None return net, embedding_net
def test_inference_block(): """ perception test """ in_dict, perception_dict = build_perception_dict() # compile inference block and predict everything at once net = InferenceBlock(in_keys=["in_key_0", "in_key_1"], out_keys="concat", in_shapes=[(1, 16), (1, 8)], perception_blocks=perception_dict) out_dict = net(in_dict) assert out_dict["concat"].ndim == 3 assert out_dict["concat"].shape[-1] == 64 assert net.out_shapes() == [out_dict["concat"].shape[1:]] try: import pygraphviz # draw inference graph graph = InferenceGraph(inference_block=net) graph.show(name='my_test_net', block_execution=False) graph.save(name='my_test_net', save_path='.') assert len(glob.glob('*my_test_net*')) == 2 except ImportError: pass
def __init__(self, obs_shapes: Dict[str, Sequence[int]], hidden_units: List[int], non_lin: nn.Module): super().__init__(obs_shapes, hidden_units, non_lin) # build action head self.perception_dict["value"] = LinearOutputBlock( in_keys="latent", out_keys="value", in_shapes=self.perception_dict["latent"].out_shapes(), output_units=1) module_init = make_module_init_normc(std=0.01) self.perception_dict["value"].apply(module_init) # compile inference model self.net = InferenceBlock(in_keys=list(obs_shapes.keys()), out_keys="value", in_shapes=list(obs_shapes.values()), perception_blocks=self.perception_dict)
def __init__(self, obs_shapes, non_lin=nn.Tanh): super().__init__() # build perception part self.perception_network = DenseBlock(in_keys="observation", out_keys="latent", in_shapes=obs_shapes['observation'], hidden_units=[32, 32], non_lin=non_lin) module_init = make_module_init_normc(std=1.0) self.perception_network.apply(module_init) # build action head self.value_head = LinearOutputBlock(in_keys="latent", out_keys="value", in_shapes=self.perception_network.out_shapes(), output_units=1) module_init = make_module_init_normc(std=0.01) self.value_head.apply(module_init) # compile inference model self.net = InferenceBlock(in_keys="observation", out_keys="value", in_shapes=list(obs_shapes.values()), perception_blocks={"latent": self.perception_network, "value": self.value_head})
class CartPoleStateValueNet(nn.Module): """The Value net (critic) computing the discounted cumulative future reward from the observations. :param obs_shapes: The shapes of all observations as a dict. :param non_lin: The nonlinear activation to be used. """ def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)]): nn.Module.__init__(self) # initialize the perception dictionary self.perception_dict = OrderedDict() # concatenate all observations in dictionary self.perception_dict['concat'] = ConcatenationBlock( in_keys=[ 'cart_position', 'cart_velocity', 'pole_angle', 'pole_angular_velocity' ], out_keys='concat', in_shapes=[ obs_shapes['cart_position'], obs_shapes['cart_velocity'], obs_shapes['pole_angle'], obs_shapes['pole_angular_velocity'] ], concat_dim=-1) # process concatenated representation with two dense layers self.perception_dict['embedding'] = DenseBlock( in_keys='concat', in_shapes=self.perception_dict['concat'].out_shapes(), hidden_units=[128, 128], non_lin=non_lin, out_keys='embedding') # add a linear output block self.perception_dict['value'] = LinearOutputBlock( in_keys='embedding', out_keys='value', in_shapes=self.perception_dict['embedding'].out_shapes(), output_units=1) # compile an inference block self.perception_net = InferenceBlock( in_keys=[ 'cart_position', 'cart_velocity', 'pole_angle', 'pole_angular_velocity' ], out_keys='value', in_shapes=[ obs_shapes[key] for key in [ 'cart_position', 'cart_velocity', 'pole_angle', 'pole_angular_velocity' ] ], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['value'].apply(make_module_init_normc(0.01)) def forward( self, tensor_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """Compute forward pass through the network. :param tensor_dict: The input tensor dictionary. :return: The computed output of the network. """ return self.perception_net(tensor_dict)
def template_q_value_net( self, observation_space: Optional[spaces.Dict], action_space: spaces.Dict, only_discrete_spaces: bool, perception_net: Optional[InferenceBlock] = None) -> InferenceBlock: """Compiles a template state action (Q) value network. :param observation_space: The input observations for the perception network. :param action_space: The action space that defines the network action heads. :param perception_net: A initial network to continue from. (e.g. useful for shared weights. Model building continues from the key 'latent'.) :param only_discrete_spaces: A dict specifying if the action spaces w.r.t. the step only hold discrete action spaces. :return: A q value network (critic) InferenceBlock. """ assert all(map(lambda space: isinstance(space, (spaces.Discrete, spaces.Box)), action_space.spaces.values())), 'Only discrete and box spaces supported thus far for q values ' \ 'critic.' if not only_discrete_spaces: discrete_space = list( filter( lambda kk: isinstance(action_space.spaces[kk], spaces. Discrete), action_space.spaces)) if len(discrete_space) > 0: new_action_space = {} for key in action_space.spaces.keys(): if key in discrete_space: new_action_space[key] = OneHotPreProcessor( action_space.spaces[key]).processed_space() else: new_action_space[key] = action_space.spaces[key] action_space = spaces.Dict(new_action_space) observation_space = spaces.Dict({ **observation_space.spaces, **action_space.spaces }) value_heads = {'q_value': 1} else: value_heads = { f'{act_key}_q_values': act_space.n for act_key, act_space in action_space.spaces.items() } # check if actions are considered as observations for the state-action critic for action_head in action_space.spaces.keys(): if action_head not in self.model_builder.observation_modality_mapping: BColors.print_colored( f'TemplateModelComposer: The action \'{action_head}\' could not be found in the ' f'model_builder.observation_modality_mapping and wont be considered ' f'as an input to the state-action critic!', BColors.FAIL) # build perception net if perception_net is None: perception_net = self.template_perception_net(observation_space) perception_dict = perception_net.perception_dict for value_head, output_units in value_heads.items(): # initialize action head value_net = LinearOutputBlock( in_keys="latent", out_keys=value_head, in_shapes=perception_dict["latent"].out_shapes(), output_units=output_units) module_init = make_module_init_normc(std=0.01) value_net.apply(module_init) # extent perception dictionary perception_dict[value_head] = value_net # compile inference model net = InferenceBlock(in_keys=perception_net.in_keys, out_keys=list(value_heads.keys()), in_shapes=perception_net.in_shapes, perception_blocks=perception_dict) return net
class CuttingPolicyNet(nn.Module): """The Policy net (actor) computing the action probabilities from the observations. :param obs_shapes: The shapes of all observations as a dict. :param action_logits_shapes: The shapes of all actions as a dict structure. :param non_lin: The nonlinear activation to be used. :param with_mask: Weather to use action masking or not. """ def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], with_mask: bool): nn.Module.__init__(self) self.obs_shapes = obs_shapes hidden_units = 32 self.perception_dict = OrderedDict() self.perception_dict['selected_feat'] = DenseBlock( in_keys='selected_piece', out_keys='selected_feat', in_shapes=self.obs_shapes['selected_piece'], hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['order_feat'] = DenseBlock( in_keys='ordered_piece', out_keys='order_feat', in_shapes=self.obs_shapes['ordered_piece'], hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['latent'] = ConcatenationBlock( in_keys=['selected_feat', 'order_feat'], out_keys='latent', in_shapes=[[hidden_units], [hidden_units]], concat_dim=-1) rotation_out_key = 'cut_rotation_logits' if with_mask else 'cut_rotation' self.perception_dict[rotation_out_key] = LinearOutputBlock( in_keys='latent', out_keys=rotation_out_key, in_shapes=self.perception_dict['latent'].out_shapes(), output_units=action_logits_shapes['cut_rotation'][0]) if with_mask: self.perception_dict['cut_rotation'] = ActionMaskingBlock( in_keys=['cut_rotation_logits', 'cutting_mask'], out_keys='cut_rotation', in_shapes=self.perception_dict['cut_rotation_logits']. out_shapes() + [self.obs_shapes['cutting_mask']], num_actors=1, num_of_actor_actions=None) self.perception_dict['cut_order'] = LinearOutputBlock( in_keys='latent', out_keys='cut_order', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=action_logits_shapes['cut_order'][0]) in_keys = ['selected_piece', 'ordered_piece'] if with_mask: in_keys.append('cutting_mask') self.perception_net = InferenceBlock( in_keys=in_keys, out_keys=['cut_rotation', 'cut_order'], in_shapes=[self.obs_shapes[key] for key in in_keys], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict[rotation_out_key].apply( make_module_init_normc(0.01)) self.perception_dict['cut_order'].apply(make_module_init_normc(0.01)) def forward(self, xx: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """Compute forward pass through the network. :param xx: Input dict. :return: The computed output of the network. """ return self.perception_net(xx)
def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], with_mask: bool): nn.Module.__init__(self) self.obs_shapes = obs_shapes hidden_units, embedding_dim = 32, 7 self.perception_dict = OrderedDict() # embed inventory # --------------- self.perception_dict['inventory_feat'] = DenseBlock( in_keys='inventory', out_keys='inventory_feat', in_shapes=self.obs_shapes['inventory'], hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['inventory_embed'] = LinearOutputBlock( in_keys='inventory_feat', out_keys='inventory_embed', in_shapes=self.perception_dict['inventory_feat'].out_shapes(), output_units=embedding_dim) # embed ordered_piece # ------------------_ self.perception_dict['order_unsqueezed'] = FunctionalBlock( in_keys='ordered_piece', out_keys='order_unsqueezed', in_shapes=self.obs_shapes['ordered_piece'], func=lambda x: torch.unsqueeze(x, dim=-2)) self.perception_dict['order_feat'] = DenseBlock( in_keys='order_unsqueezed', out_keys='order_feat', in_shapes=self.perception_dict['order_unsqueezed'].out_shapes(), hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['order_embed'] = LinearOutputBlock( in_keys='order_feat', out_keys='order_embed', in_shapes=self.perception_dict['order_feat'].out_shapes(), output_units=embedding_dim) # compute dot product score # ------------------------- in_shapes = self.perception_dict['inventory_embed'].out_shapes() in_shapes += self.perception_dict['order_embed'].out_shapes() out_key = 'corr_score' if with_mask else 'piece_idx' self.perception_dict[out_key] = CorrelationBlock( in_keys=['inventory_embed', 'order_embed'], out_keys=out_key, in_shapes=in_shapes, reduce=True) # apply action masking if with_mask: self.perception_dict['piece_idx'] = ActionMaskingBlock( in_keys=['corr_score', 'inventory_mask'], out_keys='piece_idx', in_shapes=self.perception_dict['corr_score'].out_shapes() + [self.obs_shapes['inventory_mask']], num_actors=1, num_of_actor_actions=None) assert self.perception_dict['piece_idx'].out_shapes( )[0][0] == action_logits_shapes['piece_idx'][0] in_keys = ['ordered_piece', 'inventory'] if with_mask: in_keys.append('inventory_mask') self.perception_net = InferenceBlock( in_keys=in_keys, out_keys='piece_idx', in_shapes=[self.obs_shapes[key] for key in in_keys], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['inventory_embed'].apply( make_module_init_normc(0.01)) self.perception_dict['order_embed'].apply(make_module_init_normc(0.01))
class SelectionPolicyNet(nn.Module): """Selection Policy Network for cutting 2d. :param obs_shapes: The shapes of all observations as a dict. :param action_logits_shapes: The shapes of all actions as a dict structure. :param non_lin: The nonlinear activation to be used. :param with_mask: Weather to use action masking or not. """ def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], with_mask: bool): nn.Module.__init__(self) self.obs_shapes = obs_shapes hidden_units, embedding_dim = 32, 7 self.perception_dict = OrderedDict() # embed inventory # --------------- self.perception_dict['inventory_feat'] = DenseBlock( in_keys='inventory', out_keys='inventory_feat', in_shapes=self.obs_shapes['inventory'], hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['inventory_embed'] = LinearOutputBlock( in_keys='inventory_feat', out_keys='inventory_embed', in_shapes=self.perception_dict['inventory_feat'].out_shapes(), output_units=embedding_dim) # embed ordered_piece # ------------------_ self.perception_dict['order_unsqueezed'] = FunctionalBlock( in_keys='ordered_piece', out_keys='order_unsqueezed', in_shapes=self.obs_shapes['ordered_piece'], func=lambda x: torch.unsqueeze(x, dim=-2)) self.perception_dict['order_feat'] = DenseBlock( in_keys='order_unsqueezed', out_keys='order_feat', in_shapes=self.perception_dict['order_unsqueezed'].out_shapes(), hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['order_embed'] = LinearOutputBlock( in_keys='order_feat', out_keys='order_embed', in_shapes=self.perception_dict['order_feat'].out_shapes(), output_units=embedding_dim) # compute dot product score # ------------------------- in_shapes = self.perception_dict['inventory_embed'].out_shapes() in_shapes += self.perception_dict['order_embed'].out_shapes() out_key = 'corr_score' if with_mask else 'piece_idx' self.perception_dict[out_key] = CorrelationBlock( in_keys=['inventory_embed', 'order_embed'], out_keys=out_key, in_shapes=in_shapes, reduce=True) # apply action masking if with_mask: self.perception_dict['piece_idx'] = ActionMaskingBlock( in_keys=['corr_score', 'inventory_mask'], out_keys='piece_idx', in_shapes=self.perception_dict['corr_score'].out_shapes() + [self.obs_shapes['inventory_mask']], num_actors=1, num_of_actor_actions=None) assert self.perception_dict['piece_idx'].out_shapes( )[0][0] == action_logits_shapes['piece_idx'][0] in_keys = ['ordered_piece', 'inventory'] if with_mask: in_keys.append('inventory_mask') self.perception_net = InferenceBlock( in_keys=in_keys, out_keys='piece_idx', in_shapes=[self.obs_shapes[key] for key in in_keys], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict['inventory_embed'].apply( make_module_init_normc(0.01)) self.perception_dict['order_embed'].apply(make_module_init_normc(0.01)) def forward(self, xx: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """Compute forward pass through the network. :param xx: Input dict. :return: The computed output of the network. """ return self.perception_net(xx)
def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)], with_mask: bool): nn.Module.__init__(self) self.obs_shapes = obs_shapes hidden_units = 32 self.perception_dict = OrderedDict() self.perception_dict['selected_feat'] = DenseBlock( in_keys='selected_piece', out_keys='selected_feat', in_shapes=self.obs_shapes['selected_piece'], hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['order_feat'] = DenseBlock( in_keys='ordered_piece', out_keys='order_feat', in_shapes=self.obs_shapes['ordered_piece'], hidden_units=[hidden_units], non_lin=non_lin) self.perception_dict['latent'] = ConcatenationBlock( in_keys=['selected_feat', 'order_feat'], out_keys='latent', in_shapes=[[hidden_units], [hidden_units]], concat_dim=-1) rotation_out_key = 'cut_rotation_logits' if with_mask else 'cut_rotation' self.perception_dict[rotation_out_key] = LinearOutputBlock( in_keys='latent', out_keys=rotation_out_key, in_shapes=self.perception_dict['latent'].out_shapes(), output_units=action_logits_shapes['cut_rotation'][0]) if with_mask: self.perception_dict['cut_rotation'] = ActionMaskingBlock( in_keys=['cut_rotation_logits', 'cutting_mask'], out_keys='cut_rotation', in_shapes=self.perception_dict['cut_rotation_logits']. out_shapes() + [self.obs_shapes['cutting_mask']], num_actors=1, num_of_actor_actions=None) self.perception_dict['cut_order'] = LinearOutputBlock( in_keys='latent', out_keys='cut_order', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=action_logits_shapes['cut_order'][0]) in_keys = ['selected_piece', 'ordered_piece'] if with_mask: in_keys.append('cutting_mask') self.perception_net = InferenceBlock( in_keys=in_keys, out_keys=['cut_rotation', 'cut_order'], in_shapes=[self.obs_shapes[key] for key in in_keys], perception_blocks=self.perception_dict) # initialize model weights self.perception_net.apply(make_module_init_normc(1.0)) self.perception_dict[rotation_out_key].apply( make_module_init_normc(0.01)) self.perception_dict['cut_order'].apply(make_module_init_normc(0.01))