Exemple #1
0
    def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)]):
        nn.Module.__init__(self)
        self.obs_shapes = obs_shapes

        hidden_units = 32

        self.perception_dict = OrderedDict()

        self.perception_dict['order_feat'] = DenseBlock(
            in_keys='ordered_piece', out_keys='order_feat', in_shapes=self.obs_shapes['ordered_piece'],
            hidden_units=[hidden_units], non_lin=non_lin)

        self.perception_dict['selected_feat'] = DenseBlock(
            in_keys='selected_piece', out_keys='selected_feat', in_shapes=self.obs_shapes['selected_piece'],
            hidden_units=[hidden_units], non_lin=non_lin)

        self.perception_dict['latent'] = ConcatenationBlock(
            in_keys=['order_feat', 'selected_feat'], out_keys='latent',
            in_shapes=[[hidden_units], [hidden_units], [hidden_units]], concat_dim=-1)

        self.perception_dict['value'] = LinearOutputBlock(
            in_keys='latent', out_keys='value', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=1)

        in_keys = ['ordered_piece', 'selected_piece']
        self.perception_net = InferenceBlock(
            in_keys=in_keys, out_keys='value',
            in_shapes=[self.obs_shapes[key] for key in in_keys],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['value'].apply(make_module_init_normc(0.01))
Exemple #2
0
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str, type(nn.Module)]):
        super().__init__()
        self.obs_shapes = obs_shapes
        action_key = list(action_logits_shapes.keys())[0]
        # build perception part
        self.perception_dict = OrderedDict()
        self.perception_dict['embedding'] = DenseBlock(
            in_keys="observation",
            out_keys="embedding",
            in_shapes=obs_shapes['observation'],
            hidden_units=[256, 256],
            non_lin=non_lin)

        # build action head
        self.perception_dict[action_key] = LinearOutputBlock(
            in_keys="embedding",
            out_keys=action_key,
            in_shapes=self.perception_dict['embedding'].out_shapes(),
            output_units=action_logits_shapes[action_key][0])

        self.perception_net = InferenceBlock(
            in_keys='observation',
            out_keys=action_key,
            in_shapes=[self.obs_shapes['observation']],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict[action_key].apply(make_module_init_normc(0.01))
Exemple #3
0
    def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: type(nn.Module)):
        super().__init__(obs_shapes, non_lin)

        for action_head_name in action_logits_shapes.keys():
            head_hidden_units = [lambda out_shape: out_shape[0] * 5,
                                 lambda out_shape: out_shape[0] * 2,
                                 lambda out_shape: out_shape[0]]
            head_hidden_units = [func(action_logits_shapes[action_head_name]) for func in head_hidden_units]

            self.perception_dict[f'{action_head_name}_net'] = DenseBlock(
                in_keys='hidden_out', in_shapes=self.perception_dict['hidden_out'].out_shapes(),
                out_keys=f'{action_head_name}_net', hidden_units=head_hidden_units[:-1], non_lin=non_lin)

            self.perception_dict[f'{action_head_name}'] = LinearOutputBlock(
                in_keys=f'{action_head_name}_net',
                in_shapes=self.perception_dict[f'{action_head_name}_net'].out_shapes(),
                out_keys=action_head_name, output_units=head_hidden_units[-1]
            )

        # Set up inference block
        self.perception_net = InferenceBlock(
            in_keys=list(self.obs_shapes.keys()), out_keys=list(action_logits_shapes.keys()),
            in_shapes=[self.obs_shapes[key] for key in self.obs_shapes.keys()],
            perception_blocks=self.perception_dict)

        self.perception_net.apply(make_module_init_normc(1.0))
        for action_head_name in action_logits_shapes.keys():
            self.perception_dict[f'{action_head_name}'].apply(make_module_init_normc(0.01))
Exemple #4
0
class QCriticNetContinuous(nn.Module):
    """Simple Q critic for mixed action heads (that is not all discrete). As such it computes a single q_value output
    for all observations.
    """
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 action_spaces_dict: Dict[Union[str, int], spaces.Space],
                 non_lin: Union[str, type(nn.Module)]):
        super().__init__()
        self.obs_shapes = obs_shapes
        # build perception part
        self.perception_dict = OrderedDict()
        self.perception_dict['latent-obs'] = DenseBlock(
            in_keys="observation",
            out_keys="latent-obs",
            in_shapes=obs_shapes['observation'],
            hidden_units=[256],
            non_lin=non_lin)
        self.perception_dict['latent-act'] = DenseBlock(
            in_keys="action",
            out_keys="latent-act",
            in_shapes=obs_shapes['action'],
            hidden_units=[256],
            non_lin=non_lin)

        self.perception_dict['concat'] = ConcatenationBlock(
            in_keys=['latent-obs', 'latent-act'],
            in_shapes=self.perception_dict['latent-obs'].out_shapes() +
            self.perception_dict['latent-act'].out_shapes(),
            concat_dim=-1,
            out_keys='concat')

        self.perception_dict['latent'] = DenseBlock(
            in_keys="concat",
            out_keys="latent",
            in_shapes=self.perception_dict['concat'].out_shapes(),
            hidden_units=[256],
            non_lin=non_lin)

        # build action head
        self.perception_dict['q_value'] = LinearOutputBlock(
            in_keys="latent",
            out_keys="q_value",
            in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=1)

        self.perception_net = InferenceBlock(
            in_keys=['observation', 'action'],
            out_keys='q_value',
            in_shapes=[
                self.obs_shapes['observation'], self.obs_shapes['action']
            ],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['q_value'].apply(make_module_init_normc(0.01))

    def forward(self, x):
        """ forward pass. """
        return self.perception_net(x)
Exemple #5
0
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 non_lin: type(nn.Module)):
        super().__init__(obs_shapes, non_lin)

        self.perception_dict['value_head_net'] = DenseBlock(
            in_keys='hidden_out',
            in_shapes=self.perception_dict['hidden_out'].out_shapes(),
            out_keys='value_head_net',
            hidden_units=[5, 2],
            non_lin=non_lin)

        self.perception_dict['value'] = LinearOutputBlock(
            in_keys='value_head_net',
            in_shapes=self.perception_dict['value_head_net'].out_shapes(),
            out_keys='value',
            output_units=1)

        # Set up inference block
        self.perception_net = InferenceBlock(
            in_keys=list(self.obs_shapes.keys()),
            out_keys='value',
            in_shapes=[self.obs_shapes[key] for key in self.obs_shapes.keys()],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['value'].apply(make_module_init_normc(0.01))
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str,
                                type(nn.Module)], hidden_units: List[int]):
        nn.Module.__init__(self)
        CustomComplexLatentNet.__init__(self, obs_shapes, non_lin,
                                        hidden_units)

        # build action heads
        self.perception_dict['value'] = LinearOutputBlock(
            in_keys='latent',
            out_keys='value',
            in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=1)

        # build inference block
        in_keys = list(self.obs_shapes.keys())
        self.perception_net = InferenceBlock(
            in_keys=in_keys,
            out_keys='value',
            in_shapes=[self.obs_shapes[key] for key in in_keys],
            perception_blocks=self.perception_dict)

        # apply weight init
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['value'].apply(make_module_init_normc(0.01))
    def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str, type(nn.Module)], hidden_units: List[int]):
        super().__init__()

        # Maze relies on dictionaries to represent the inference graph
        self.perception_dict = OrderedDict()

        # build latent embedding block
        self.perception_dict['latent'] = DenseBlock(
            in_keys='observation', out_keys='latent', in_shapes=obs_shapes['observation'],
            hidden_units=hidden_units,non_lin=non_lin)

        # build action head
        self.perception_dict['action'] = LinearOutputBlock(
            in_keys='latent', out_keys='action', in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=int(np.prod(action_logits_shapes["action"])))

        # build inference block
        self.perception_net = InferenceBlock(
            in_keys='observation', out_keys='action', in_shapes=obs_shapes['observation'],
            perception_blocks=self.perception_dict)

        # apply weight init
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['action'].apply(make_module_init_normc(0.01))
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str,
                                type(nn.Module)], hidden_units: List[int]):
        nn.Module.__init__(self)
        CustomComplexLatentNet.__init__(self, obs_shapes, non_lin,
                                        hidden_units)

        # build action heads
        for action_key, action_shape in action_logits_shapes.items():
            self.perception_dict[action_key] = LinearOutputBlock(
                in_keys='latent',
                out_keys=action_key,
                in_shapes=self.perception_dict['latent'].out_shapes(),
                output_units=int(np.prod(action_shape)))

        # build inference block
        in_keys = list(self.obs_shapes.keys())
        # Specifically add 'latent_screen' as an out_key to the network, so it will get returned when calling the
        #   forward method and can be reused by the critic network.
        out_keys = list(action_logits_shapes.keys()) + ['latent_screen']
        self.perception_net = InferenceBlock(
            in_keys=in_keys,
            out_keys=out_keys,
            perception_blocks=self.perception_dict,
            in_shapes=[self.obs_shapes[key] for key in in_keys])

        # apply weight init
        self.perception_net.apply(make_module_init_normc(1.0))
        for action_key in action_logits_shapes.keys():
            self.perception_dict[action_key].apply(
                make_module_init_normc(0.01))
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str,
                                type(nn.Module)], hidden_units: List[int]):
        nn.Module.__init__(self)

        # Maze relies on dictionaries to represent the inference graph
        self.perception_dict = OrderedDict()

        # build latent feature embedding block
        self.perception_dict['latent_inventory'] = DenseBlock(
            in_keys='observation_inventory',
            out_keys='latent_inventory',
            in_shapes=obs_shapes['observation_inventory'],
            hidden_units=[128],
            non_lin=non_lin)

        # Concatenate latent features
        self.perception_dict['latent_concat'] = ConcatenationBlock(
            in_keys=['latent_inventory', 'latent_screen'],
            out_keys='latent_concat',
            in_shapes=self.perception_dict['latent_inventory'].out_shapes() +
            [obs_shapes['latent_screen']],
            concat_dim=-1)

        # Add latent dense block
        self.perception_dict['latent_dense'] = DenseBlock(
            in_keys='latent_concat',
            out_keys='latent_dense',
            hidden_units=hidden_units,
            non_lin=non_lin,
            in_shapes=self.perception_dict['latent_concat'].out_shapes())

        # Add recurrent block
        self.perception_dict['latent'] = LSTMLastStepBlock(
            in_keys='latent_dense',
            out_keys='latent',
            in_shapes=self.perception_dict['latent_dense'].out_shapes(),
            hidden_size=32,
            num_layers=1,
            bidirectional=False,
            non_lin=non_lin)

        # build action heads
        self.perception_dict['value'] = LinearOutputBlock(
            in_keys='latent',
            out_keys='value',
            in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=1)

        # build inference block
        in_keys = list(obs_shapes.keys())
        self.perception_net = InferenceBlock(
            in_keys=in_keys,
            out_keys='value',
            in_shapes=[obs_shapes[key] for key in in_keys],
            perception_blocks=self.perception_dict)

        # apply weight init
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['value'].apply(make_module_init_normc(0.01))
Exemple #10
0
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str, type(nn.Module)]):
        nn.Module.__init__(self)

        # initialize the perception dictionary
        self.perception_dict = OrderedDict()

        # concatenate all observations in dictionary
        self.perception_dict['concat'] = ConcatenationBlock(
            in_keys=[
                'cart_position', 'cart_velocity', 'pole_angle',
                'pole_angular_velocity'
            ],
            out_keys='concat',
            in_shapes=[
                obs_shapes['cart_position'], obs_shapes['cart_velocity'],
                obs_shapes['pole_angle'], obs_shapes['pole_angular_velocity']
            ],
            concat_dim=-1)

        # process concatenated representation with two dense layers
        self.perception_dict['embedding'] = DenseBlock(
            in_keys='concat',
            in_shapes=self.perception_dict['concat'].out_shapes(),
            hidden_units=[128, 128],
            non_lin=non_lin,
            out_keys='embedding')

        # add a linear output block
        self.perception_dict['value'] = LinearOutputBlock(
            in_keys='embedding',
            out_keys='value',
            in_shapes=self.perception_dict['embedding'].out_shapes(),
            output_units=1)

        # compile an inference block
        self.perception_net = InferenceBlock(
            in_keys=[
                'cart_position', 'cart_velocity', 'pole_angle',
                'pole_angular_velocity'
            ],
            out_keys='value',
            in_shapes=[
                obs_shapes[key] for key in [
                    'cart_position', 'cart_velocity', 'pole_angle',
                    'pole_angular_velocity'
                ]
            ],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['value'].apply(make_module_init_normc(0.01))
Exemple #11
0
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 action_spaces_dict: Dict[Union[str, int], spaces.Space],
                 non_lin: Union[str, type(nn.Module)]):
        super().__init__()
        self.obs_shapes = obs_shapes
        # build perception part
        self.perception_dict = OrderedDict()
        self.perception_dict['latent-obs'] = DenseBlock(
            in_keys="observation",
            out_keys="latent-obs",
            in_shapes=obs_shapes['observation'],
            hidden_units=[256],
            non_lin=non_lin)
        self.perception_dict['latent-act'] = DenseBlock(
            in_keys="action",
            out_keys="latent-act",
            in_shapes=obs_shapes['action'],
            hidden_units=[256],
            non_lin=non_lin)

        self.perception_dict['concat'] = ConcatenationBlock(
            in_keys=['latent-obs', 'latent-act'],
            in_shapes=self.perception_dict['latent-obs'].out_shapes() +
            self.perception_dict['latent-act'].out_shapes(),
            concat_dim=-1,
            out_keys='concat')

        self.perception_dict['latent'] = DenseBlock(
            in_keys="concat",
            out_keys="latent",
            in_shapes=self.perception_dict['concat'].out_shapes(),
            hidden_units=[256],
            non_lin=non_lin)

        # build action head
        self.perception_dict['q_value'] = LinearOutputBlock(
            in_keys="latent",
            out_keys="q_value",
            in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=1)

        self.perception_net = InferenceBlock(
            in_keys=['observation', 'action'],
            out_keys='q_value',
            in_shapes=[
                self.obs_shapes['observation'], self.obs_shapes['action']
            ],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['q_value'].apply(make_module_init_normc(0.01))
class CustomSharedComplexPolicyNet(nn.Module, CustomComplexLatentNet):
    """Simple feed forward policy network.

    :param obs_shapes: The shapes of all observations as a dict.
    :param action_logits_shapes: The shapes of all actions as a dict structure.
    :param non_lin: The nonlinear activation to be used.
    :param hidden_units: A list of units per hidden layer.
    """
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str,
                                type(nn.Module)], hidden_units: List[int]):
        nn.Module.__init__(self)
        CustomComplexLatentNet.__init__(self, obs_shapes, non_lin,
                                        hidden_units)

        # build action heads
        for action_key, action_shape in action_logits_shapes.items():
            self.perception_dict[action_key] = LinearOutputBlock(
                in_keys='latent',
                out_keys=action_key,
                in_shapes=self.perception_dict['latent'].out_shapes(),
                output_units=int(np.prod(action_shape)))

        # build inference block
        in_keys = list(self.obs_shapes.keys())
        # Specifically add 'latent_screen' as an out_key to the network, so it will get returned when calling the
        #   forward method and can be reused by the critic network.
        out_keys = list(action_logits_shapes.keys()) + ['latent_screen']
        self.perception_net = InferenceBlock(
            in_keys=in_keys,
            out_keys=out_keys,
            perception_blocks=self.perception_dict,
            in_shapes=[self.obs_shapes[key] for key in in_keys])

        # apply weight init
        self.perception_net.apply(make_module_init_normc(1.0))
        for action_key in action_logits_shapes.keys():
            self.perception_dict[action_key].apply(
                make_module_init_normc(0.01))

    def forward(
            self,
            in_tensor_dict: Dict[str,
                                 torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Compute forward pass through the network.

        :param in_tensor_dict: Input tensor dict.
        :return: The computed output of the network.
        """
        return self.perception_net(in_tensor_dict)
Exemple #13
0
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 hidden_units: List[int], non_lin: nn.Module,
                 support_range: Tuple[int, int]):
        super().__init__(obs_shapes, hidden_units, non_lin)

        # build categorical value head
        support_set_size = support_range[1] - support_range[0] + 1
        self.perception_dict["probabilities"] = LinearOutputBlock(
            in_keys="latent",
            out_keys="probabilities",
            in_shapes=self.perception_dict["latent"].out_shapes(),
            output_units=support_set_size)

        # compute value as probability weighted sum of supports
        def _to_scalar(x: torch.Tensor) -> torch.Tensor:
            return support_to_scalar(x, support_range=support_range)

        self.perception_dict["value"] = FunctionalBlock(
            in_keys="probabilities",
            out_keys="value",
            in_shapes=self.perception_dict["probabilities"].out_shapes(),
            func=_to_scalar)

        module_init = make_module_init_normc(std=0.01)
        self.perception_dict["probabilities"].apply(module_init)

        # compile inference model
        self.net = InferenceBlock(in_keys=list(obs_shapes.keys()),
                                  out_keys=["probabilities", "value"],
                                  in_shapes=list(obs_shapes.values()),
                                  perception_blocks=self.perception_dict)
    def __init__(self,
                 obs_shapes: Dict[str, Sequence[int]],
                 action_logits_shapes: Dict[str, Sequence[int]],
                 hidden_units: List[int],
                 head_units: List[int],
                 non_lin=nn.Module):
        super().__init__(obs_shapes, hidden_units, non_lin)

        # build perception part
        self.perception_dict["head"] = DenseBlock(
            in_keys="latent",
            out_keys="head",
            in_shapes=self.perception_dict["latent"].out_shapes(),
            hidden_units=head_units,
            non_lin=self.non_lin)

        self.perception_dict['head'].apply(make_module_init_normc(std=1.0))

        # build action head
        for action, shape in action_logits_shapes.items():
            self.perception_dict[action] = LinearOutputBlock(
                in_keys="head",
                out_keys=action,
                in_shapes=self.perception_dict["head"].out_shapes(),
                output_units=action_logits_shapes[action][-1])

            module_init = make_module_init_normc(std=0.01)
            self.perception_dict[action].apply(module_init)

        # compile inference model
        self.net = InferenceBlock(in_keys=list(obs_shapes.keys()),
                                  out_keys=list(action_logits_shapes.keys()) +
                                  ['latent'],
                                  in_shapes=list(obs_shapes.values()),
                                  perception_blocks=self.perception_dict)
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 head_units: List[int], non_lin: nn.Module):
        super().__init__()

        self.perception_dict: Dict[str, PerceptionBlock] = dict()
        # build action head

        # build perception part
        self.perception_dict["head"] = DenseBlock(
            in_keys="latent",
            out_keys="head",
            in_shapes=obs_shapes["latent"],
            hidden_units=head_units,
            non_lin=non_lin)

        self.perception_dict["value"] = LinearOutputBlock(
            in_keys="head",
            out_keys="value",
            in_shapes=self.perception_dict["head"].out_shapes(),
            output_units=1)

        self.perception_dict['head'].apply(make_module_init_normc(std=1.0))
        self.perception_dict["value"].apply(make_module_init_normc(std=0.01))

        # compile inference model
        self.net = InferenceBlock(in_keys=list(obs_shapes.keys()),
                                  out_keys="value",
                                  in_shapes=list(obs_shapes.values()),
                                  perception_blocks=self.perception_dict)
Exemple #16
0
class CuttingValueNet(nn.Module):
    """The Value net (critic) computing the predicted reward from the observations.

    :param obs_shapes: The shapes of all observations as a dict.
    :param non_lin: The nonlinear activation to be used.
    """

    def __init__(self, obs_shapes: Dict[str, Sequence[int]], non_lin: Union[str, type(nn.Module)]):
        nn.Module.__init__(self)
        self.obs_shapes = obs_shapes

        hidden_units = 32

        self.perception_dict = OrderedDict()

        self.perception_dict['order_feat'] = DenseBlock(
            in_keys='ordered_piece', out_keys='order_feat', in_shapes=self.obs_shapes['ordered_piece'],
            hidden_units=[hidden_units], non_lin=non_lin)

        self.perception_dict['selected_feat'] = DenseBlock(
            in_keys='selected_piece', out_keys='selected_feat', in_shapes=self.obs_shapes['selected_piece'],
            hidden_units=[hidden_units], non_lin=non_lin)

        self.perception_dict['latent'] = ConcatenationBlock(
            in_keys=['order_feat', 'selected_feat'], out_keys='latent',
            in_shapes=[[hidden_units], [hidden_units], [hidden_units]], concat_dim=-1)

        self.perception_dict['value'] = LinearOutputBlock(
            in_keys='latent', out_keys='value', in_shapes=self.perception_dict['latent'].out_shapes(), output_units=1)

        in_keys = ['ordered_piece', 'selected_piece']
        self.perception_net = InferenceBlock(
            in_keys=in_keys, out_keys='value',
            in_shapes=[self.obs_shapes[key] for key in in_keys],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['value'].apply(make_module_init_normc(0.01))

    def forward(self, xx: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Compute forward pass through the network

        :param xx: Input dict
        :return: The computed output of the network
        """
        return self.perception_net(xx)
Exemple #17
0
class DummyPolicyNet(DummyBaseNet):
    """Policy network.

    :param obs_shapes: The shapes of all observations as a dict.
    :param action_logits_shapes: The shapes of all actions as a dict structure.
    :param non_lin: The nonlinear activation to be used.
    """

    def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: type(nn.Module)):
        super().__init__(obs_shapes, non_lin)

        for action_head_name in action_logits_shapes.keys():
            head_hidden_units = [lambda out_shape: out_shape[0] * 5,
                                 lambda out_shape: out_shape[0] * 2,
                                 lambda out_shape: out_shape[0]]
            head_hidden_units = [func(action_logits_shapes[action_head_name]) for func in head_hidden_units]

            self.perception_dict[f'{action_head_name}_net'] = DenseBlock(
                in_keys='hidden_out', in_shapes=self.perception_dict['hidden_out'].out_shapes(),
                out_keys=f'{action_head_name}_net', hidden_units=head_hidden_units[:-1], non_lin=non_lin)

            self.perception_dict[f'{action_head_name}'] = LinearOutputBlock(
                in_keys=f'{action_head_name}_net',
                in_shapes=self.perception_dict[f'{action_head_name}_net'].out_shapes(),
                out_keys=action_head_name, output_units=head_hidden_units[-1]
            )

        # Set up inference block
        self.perception_net = InferenceBlock(
            in_keys=list(self.obs_shapes.keys()), out_keys=list(action_logits_shapes.keys()),
            in_shapes=[self.obs_shapes[key] for key in self.obs_shapes.keys()],
            perception_blocks=self.perception_dict)

        self.perception_net.apply(make_module_init_normc(1.0))
        for action_head_name in action_logits_shapes.keys():
            self.perception_dict[f'{action_head_name}'].apply(make_module_init_normc(0.01))

    def forward(self, xx: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Compute forward pass through the network.

        :param xx: Input dict.
        :return: The computed output of the network.
        """
        return self.perception_net(xx)
class CustomComplexCriticNet(nn.Module, CustomComplexLatentNet):
    """Simple feed forward policy network.

    :param obs_shapes: The shapes of all observations as a dict.
    :param non_lin: The nonlinear activation to be used.
    :param hidden_units: A list of units per hidden layer.
    """
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str,
                                type(nn.Module)], hidden_units: List[int]):
        nn.Module.__init__(self)
        CustomComplexLatentNet.__init__(self, obs_shapes, non_lin,
                                        hidden_units)

        # build action heads
        self.perception_dict['value'] = LinearOutputBlock(
            in_keys='latent',
            out_keys='value',
            in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=1)

        # build inference block
        in_keys = list(self.obs_shapes.keys())
        self.perception_net = InferenceBlock(
            in_keys=in_keys,
            out_keys='value',
            in_shapes=[self.obs_shapes[key] for key in in_keys],
            perception_blocks=self.perception_dict)

        # apply weight init
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['value'].apply(make_module_init_normc(0.01))

    def forward(
            self,
            in_tensor_dict: Dict[str,
                                 torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Compute forward pass through the network.

        :param in_tensor_dict: Input tensor dict.
        :return: The computed output of the network.
        """
        return self.perception_net(in_tensor_dict)
class CustomCartpolePolicyNet(nn.Module):
    """Simple feed forward policy network.

    :param obs_shapes: The shapes of all observations as a dict.
    :param action_logits_shapes: The shapes of all actions as a dict structure.
    :param non_lin: The nonlinear activation to be used.
    :param hidden_units: A list of units per hidden layer.
    """

    def __init__(self, obs_shapes: Dict[str, Sequence[int]], action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str, type(nn.Module)], hidden_units: List[int]):
        super().__init__()

        # Maze relies on dictionaries to represent the inference graph
        self.perception_dict = OrderedDict()

        # build latent embedding block
        self.perception_dict['latent'] = DenseBlock(
            in_keys='observation', out_keys='latent', in_shapes=obs_shapes['observation'],
            hidden_units=hidden_units,non_lin=non_lin)

        # build action head
        self.perception_dict['action'] = LinearOutputBlock(
            in_keys='latent', out_keys='action', in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=int(np.prod(action_logits_shapes["action"])))

        # build inference block
        self.perception_net = InferenceBlock(
            in_keys='observation', out_keys='action', in_shapes=obs_shapes['observation'],
            perception_blocks=self.perception_dict)

        # apply weight init
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['action'].apply(make_module_init_normc(0.01))

    def forward(self, in_tensor_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Compute forward pass through the network.

        :param in_tensor_dict: Input tensor dict.
        :return: The computed output of the network.
        """
        return self.perception_net(in_tensor_dict)
Exemple #20
0
class DummyValueNet(DummyBaseNet):
    """Policy network.

    :param obs_shapes: The shapes of all observations as a dict.
    :param non_lin: The nonlinear activation to be used.
    """
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 non_lin: type(nn.Module)):
        super().__init__(obs_shapes, non_lin)

        self.perception_dict['value_head_net'] = DenseBlock(
            in_keys='hidden_out',
            in_shapes=self.perception_dict['hidden_out'].out_shapes(),
            out_keys='value_head_net',
            hidden_units=[5, 2],
            non_lin=non_lin)

        self.perception_dict['value'] = LinearOutputBlock(
            in_keys='value_head_net',
            in_shapes=self.perception_dict['value_head_net'].out_shapes(),
            out_keys='value',
            output_units=1)

        # Set up inference block
        self.perception_net = InferenceBlock(
            in_keys=list(self.obs_shapes.keys()),
            out_keys='value',
            in_shapes=[self.obs_shapes[key] for key in self.obs_shapes.keys()],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['value'].apply(make_module_init_normc(0.01))

    def forward(self, xx: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Compute forward pass through the network.

        :param xx: Input dict.
        :return: The computed output of the network.
        """
        return self.perception_net(xx)
    def template_policy_net(
        self, observation_space: spaces.Dict, action_space: spaces.Dict,
        shared_embedding_keys: List[str]
    ) -> Tuple[InferenceBlock, InferenceBlock]:
        """Compiles a template policy network.

        :param observation_space: The input observations for the perception network.
        :param action_space: The action space that defines the network action heads.
        :param shared_embedding_keys: The list of embedding keys for this substep's model.
        :return: A policy network (actor) InferenceBlock, as well as the embedding net InferenceBlock if shared keys
            have been specified.
        """

        # build perception net
        embedding_net = self.template_perception_net(observation_space)

        # build action head
        perception_dict = embedding_net.perception_dict
        action_heads = []
        for action_head, action_space in action_space.spaces.items():
            # initialize action head
            action_net = LinearOutputBlock(
                in_keys="latent",
                out_keys=action_head,
                in_shapes=perception_dict["latent"].out_shapes(),
                output_units=self._distribution_mapper.required_logits_shape(
                    action_head)[0])

            module_init = make_module_init_normc(std=0.01)
            action_net.apply(module_init)

            # extent perception dictionary
            perception_dict[action_head] = action_net
            action_heads.append(action_head)

        # compile inference model
        shared_embedding_keys_remove_input = list(
            filter(lambda x: x not in embedding_net.in_keys,
                   shared_embedding_keys))
        net = InferenceBlock(in_keys=embedding_net.in_keys,
                             out_keys=action_heads +
                             shared_embedding_keys_remove_input,
                             in_shapes=embedding_net.in_shapes,
                             perception_blocks=perception_dict)

        if len(shared_embedding_keys_remove_input) == 0:
            embedding_net = None

        return net, embedding_net
Exemple #22
0
def test_inference_block():
    """ perception test """
    in_dict, perception_dict = build_perception_dict()

    # compile inference block and predict everything at once
    net = InferenceBlock(in_keys=["in_key_0", "in_key_1"],
                         out_keys="concat",
                         in_shapes=[(1, 16), (1, 8)],
                         perception_blocks=perception_dict)
    out_dict = net(in_dict)
    assert out_dict["concat"].ndim == 3
    assert out_dict["concat"].shape[-1] == 64
    assert net.out_shapes() == [out_dict["concat"].shape[1:]]

    try:
        import pygraphviz

        # draw inference graph
        graph = InferenceGraph(inference_block=net)
        graph.show(name='my_test_net', block_execution=False)
        graph.save(name='my_test_net', save_path='.')
        assert len(glob.glob('*my_test_net*')) == 2
    except ImportError:
        pass
Exemple #23
0
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 hidden_units: List[int], non_lin: nn.Module):
        super().__init__(obs_shapes, hidden_units, non_lin)

        # build action head
        self.perception_dict["value"] = LinearOutputBlock(
            in_keys="latent",
            out_keys="value",
            in_shapes=self.perception_dict["latent"].out_shapes(),
            output_units=1)

        module_init = make_module_init_normc(std=0.01)
        self.perception_dict["value"].apply(module_init)

        # compile inference model
        self.net = InferenceBlock(in_keys=list(obs_shapes.keys()),
                                  out_keys="value",
                                  in_shapes=list(obs_shapes.values()),
                                  perception_blocks=self.perception_dict)
Exemple #24
0
    def __init__(self, obs_shapes, non_lin=nn.Tanh):
        super().__init__()

        # build perception part
        self.perception_network = DenseBlock(in_keys="observation", out_keys="latent",
                                             in_shapes=obs_shapes['observation'],
                                             hidden_units=[32, 32], non_lin=non_lin)

        module_init = make_module_init_normc(std=1.0)
        self.perception_network.apply(module_init)

        # build action head
        self.value_head = LinearOutputBlock(in_keys="latent", out_keys="value",
                                            in_shapes=self.perception_network.out_shapes(),
                                            output_units=1)

        module_init = make_module_init_normc(std=0.01)
        self.value_head.apply(module_init)

        # compile inference model
        self.net = InferenceBlock(in_keys="observation", out_keys="value", in_shapes=list(obs_shapes.values()),
                                  perception_blocks={"latent": self.perception_network,
                                                     "value": self.value_head})
Exemple #25
0
class CartPoleStateValueNet(nn.Module):
    """The Value net (critic) computing the discounted cumulative future reward from the observations.

    :param obs_shapes: The shapes of all observations as a dict.
    :param non_lin: The nonlinear activation to be used.
    """
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str, type(nn.Module)]):
        nn.Module.__init__(self)

        # initialize the perception dictionary
        self.perception_dict = OrderedDict()

        # concatenate all observations in dictionary
        self.perception_dict['concat'] = ConcatenationBlock(
            in_keys=[
                'cart_position', 'cart_velocity', 'pole_angle',
                'pole_angular_velocity'
            ],
            out_keys='concat',
            in_shapes=[
                obs_shapes['cart_position'], obs_shapes['cart_velocity'],
                obs_shapes['pole_angle'], obs_shapes['pole_angular_velocity']
            ],
            concat_dim=-1)

        # process concatenated representation with two dense layers
        self.perception_dict['embedding'] = DenseBlock(
            in_keys='concat',
            in_shapes=self.perception_dict['concat'].out_shapes(),
            hidden_units=[128, 128],
            non_lin=non_lin,
            out_keys='embedding')

        # add a linear output block
        self.perception_dict['value'] = LinearOutputBlock(
            in_keys='embedding',
            out_keys='value',
            in_shapes=self.perception_dict['embedding'].out_shapes(),
            output_units=1)

        # compile an inference block
        self.perception_net = InferenceBlock(
            in_keys=[
                'cart_position', 'cart_velocity', 'pole_angle',
                'pole_angular_velocity'
            ],
            out_keys='value',
            in_shapes=[
                obs_shapes[key] for key in [
                    'cart_position', 'cart_velocity', 'pole_angle',
                    'pole_angular_velocity'
                ]
            ],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['value'].apply(make_module_init_normc(0.01))

    def forward(
            self, tensor_dict: Dict[str,
                                    torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Compute forward pass through the network.

        :param tensor_dict: The input tensor dictionary.
        :return: The computed output of the network.
        """
        return self.perception_net(tensor_dict)
    def template_q_value_net(
            self,
            observation_space: Optional[spaces.Dict],
            action_space: spaces.Dict,
            only_discrete_spaces: bool,
            perception_net: Optional[InferenceBlock] = None) -> InferenceBlock:
        """Compiles a template state action (Q) value network.

        :param observation_space: The input observations for the perception network.
        :param action_space: The action space that defines the network action heads.
        :param perception_net: A initial network to continue from.
                               (e.g. useful for shared weights. Model building continues from the key 'latent'.)
        :param only_discrete_spaces: A dict specifying if the action spaces w.r.t. the step only hold discrete action
                                     spaces.
        :return: A q value network (critic) InferenceBlock.
        """
        assert all(map(lambda space: isinstance(space, (spaces.Discrete, spaces.Box)),
                       action_space.spaces.values())), 'Only discrete and box spaces supported thus far for q values ' \
                                                       'critic.'

        if not only_discrete_spaces:
            discrete_space = list(
                filter(
                    lambda kk: isinstance(action_space.spaces[kk], spaces.
                                          Discrete), action_space.spaces))
            if len(discrete_space) > 0:
                new_action_space = {}
                for key in action_space.spaces.keys():
                    if key in discrete_space:
                        new_action_space[key] = OneHotPreProcessor(
                            action_space.spaces[key]).processed_space()
                    else:
                        new_action_space[key] = action_space.spaces[key]
                action_space = spaces.Dict(new_action_space)
            observation_space = spaces.Dict({
                **observation_space.spaces,
                **action_space.spaces
            })
            value_heads = {'q_value': 1}
        else:
            value_heads = {
                f'{act_key}_q_values': act_space.n
                for act_key, act_space in action_space.spaces.items()
            }

        # check if actions are considered as observations for the state-action critic
        for action_head in action_space.spaces.keys():
            if action_head not in self.model_builder.observation_modality_mapping:
                BColors.print_colored(
                    f'TemplateModelComposer: The action \'{action_head}\' could not be found in the '
                    f'model_builder.observation_modality_mapping and wont be considered '
                    f'as an input to the state-action critic!', BColors.FAIL)

        # build perception net
        if perception_net is None:
            perception_net = self.template_perception_net(observation_space)

        perception_dict = perception_net.perception_dict
        for value_head, output_units in value_heads.items():
            # initialize action head
            value_net = LinearOutputBlock(
                in_keys="latent",
                out_keys=value_head,
                in_shapes=perception_dict["latent"].out_shapes(),
                output_units=output_units)

            module_init = make_module_init_normc(std=0.01)
            value_net.apply(module_init)

            # extent perception dictionary
            perception_dict[value_head] = value_net

        # compile inference model
        net = InferenceBlock(in_keys=perception_net.in_keys,
                             out_keys=list(value_heads.keys()),
                             in_shapes=perception_net.in_shapes,
                             perception_blocks=perception_dict)

        return net
Exemple #27
0
class CuttingPolicyNet(nn.Module):
    """The Policy net (actor) computing the action probabilities from the observations.

    :param obs_shapes: The shapes of all observations as a dict.
    :param action_logits_shapes: The shapes of all actions as a dict structure.
    :param non_lin: The nonlinear activation to be used.
    :param with_mask: Weather to use action masking or not.
    """
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str, type(nn.Module)], with_mask: bool):
        nn.Module.__init__(self)
        self.obs_shapes = obs_shapes

        hidden_units = 32

        self.perception_dict = OrderedDict()

        self.perception_dict['selected_feat'] = DenseBlock(
            in_keys='selected_piece',
            out_keys='selected_feat',
            in_shapes=self.obs_shapes['selected_piece'],
            hidden_units=[hidden_units],
            non_lin=non_lin)

        self.perception_dict['order_feat'] = DenseBlock(
            in_keys='ordered_piece',
            out_keys='order_feat',
            in_shapes=self.obs_shapes['ordered_piece'],
            hidden_units=[hidden_units],
            non_lin=non_lin)

        self.perception_dict['latent'] = ConcatenationBlock(
            in_keys=['selected_feat', 'order_feat'],
            out_keys='latent',
            in_shapes=[[hidden_units], [hidden_units]],
            concat_dim=-1)

        rotation_out_key = 'cut_rotation_logits' if with_mask else 'cut_rotation'
        self.perception_dict[rotation_out_key] = LinearOutputBlock(
            in_keys='latent',
            out_keys=rotation_out_key,
            in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=action_logits_shapes['cut_rotation'][0])

        if with_mask:
            self.perception_dict['cut_rotation'] = ActionMaskingBlock(
                in_keys=['cut_rotation_logits', 'cutting_mask'],
                out_keys='cut_rotation',
                in_shapes=self.perception_dict['cut_rotation_logits'].
                out_shapes() + [self.obs_shapes['cutting_mask']],
                num_actors=1,
                num_of_actor_actions=None)

        self.perception_dict['cut_order'] = LinearOutputBlock(
            in_keys='latent',
            out_keys='cut_order',
            in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=action_logits_shapes['cut_order'][0])

        in_keys = ['selected_piece', 'ordered_piece']
        if with_mask:
            in_keys.append('cutting_mask')
        self.perception_net = InferenceBlock(
            in_keys=in_keys,
            out_keys=['cut_rotation', 'cut_order'],
            in_shapes=[self.obs_shapes[key] for key in in_keys],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict[rotation_out_key].apply(
            make_module_init_normc(0.01))
        self.perception_dict['cut_order'].apply(make_module_init_normc(0.01))

    def forward(self, xx: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Compute forward pass through the network.

        :param xx: Input dict.
        :return: The computed output of the network.
        """
        return self.perception_net(xx)
Exemple #28
0
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str, type(nn.Module)], with_mask: bool):
        nn.Module.__init__(self)
        self.obs_shapes = obs_shapes

        hidden_units, embedding_dim = 32, 7

        self.perception_dict = OrderedDict()

        # embed inventory
        # ---------------
        self.perception_dict['inventory_feat'] = DenseBlock(
            in_keys='inventory',
            out_keys='inventory_feat',
            in_shapes=self.obs_shapes['inventory'],
            hidden_units=[hidden_units],
            non_lin=non_lin)

        self.perception_dict['inventory_embed'] = LinearOutputBlock(
            in_keys='inventory_feat',
            out_keys='inventory_embed',
            in_shapes=self.perception_dict['inventory_feat'].out_shapes(),
            output_units=embedding_dim)

        # embed ordered_piece
        # ------------------_
        self.perception_dict['order_unsqueezed'] = FunctionalBlock(
            in_keys='ordered_piece',
            out_keys='order_unsqueezed',
            in_shapes=self.obs_shapes['ordered_piece'],
            func=lambda x: torch.unsqueeze(x, dim=-2))

        self.perception_dict['order_feat'] = DenseBlock(
            in_keys='order_unsqueezed',
            out_keys='order_feat',
            in_shapes=self.perception_dict['order_unsqueezed'].out_shapes(),
            hidden_units=[hidden_units],
            non_lin=non_lin)

        self.perception_dict['order_embed'] = LinearOutputBlock(
            in_keys='order_feat',
            out_keys='order_embed',
            in_shapes=self.perception_dict['order_feat'].out_shapes(),
            output_units=embedding_dim)

        # compute dot product score
        # -------------------------
        in_shapes = self.perception_dict['inventory_embed'].out_shapes()
        in_shapes += self.perception_dict['order_embed'].out_shapes()
        out_key = 'corr_score' if with_mask else 'piece_idx'
        self.perception_dict[out_key] = CorrelationBlock(
            in_keys=['inventory_embed', 'order_embed'],
            out_keys=out_key,
            in_shapes=in_shapes,
            reduce=True)

        # apply action masking
        if with_mask:
            self.perception_dict['piece_idx'] = ActionMaskingBlock(
                in_keys=['corr_score', 'inventory_mask'],
                out_keys='piece_idx',
                in_shapes=self.perception_dict['corr_score'].out_shapes() +
                [self.obs_shapes['inventory_mask']],
                num_actors=1,
                num_of_actor_actions=None)

        assert self.perception_dict['piece_idx'].out_shapes(
        )[0][0] == action_logits_shapes['piece_idx'][0]

        in_keys = ['ordered_piece', 'inventory']
        if with_mask:
            in_keys.append('inventory_mask')
        self.perception_net = InferenceBlock(
            in_keys=in_keys,
            out_keys='piece_idx',
            in_shapes=[self.obs_shapes[key] for key in in_keys],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['inventory_embed'].apply(
            make_module_init_normc(0.01))
        self.perception_dict['order_embed'].apply(make_module_init_normc(0.01))
Exemple #29
0
class SelectionPolicyNet(nn.Module):
    """Selection Policy Network for cutting 2d.

    :param obs_shapes: The shapes of all observations as a dict.
    :param action_logits_shapes: The shapes of all actions as a dict structure.
    :param non_lin: The nonlinear activation to be used.
    :param with_mask: Weather to use action masking or not.
    """
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str, type(nn.Module)], with_mask: bool):
        nn.Module.__init__(self)
        self.obs_shapes = obs_shapes

        hidden_units, embedding_dim = 32, 7

        self.perception_dict = OrderedDict()

        # embed inventory
        # ---------------
        self.perception_dict['inventory_feat'] = DenseBlock(
            in_keys='inventory',
            out_keys='inventory_feat',
            in_shapes=self.obs_shapes['inventory'],
            hidden_units=[hidden_units],
            non_lin=non_lin)

        self.perception_dict['inventory_embed'] = LinearOutputBlock(
            in_keys='inventory_feat',
            out_keys='inventory_embed',
            in_shapes=self.perception_dict['inventory_feat'].out_shapes(),
            output_units=embedding_dim)

        # embed ordered_piece
        # ------------------_
        self.perception_dict['order_unsqueezed'] = FunctionalBlock(
            in_keys='ordered_piece',
            out_keys='order_unsqueezed',
            in_shapes=self.obs_shapes['ordered_piece'],
            func=lambda x: torch.unsqueeze(x, dim=-2))

        self.perception_dict['order_feat'] = DenseBlock(
            in_keys='order_unsqueezed',
            out_keys='order_feat',
            in_shapes=self.perception_dict['order_unsqueezed'].out_shapes(),
            hidden_units=[hidden_units],
            non_lin=non_lin)

        self.perception_dict['order_embed'] = LinearOutputBlock(
            in_keys='order_feat',
            out_keys='order_embed',
            in_shapes=self.perception_dict['order_feat'].out_shapes(),
            output_units=embedding_dim)

        # compute dot product score
        # -------------------------
        in_shapes = self.perception_dict['inventory_embed'].out_shapes()
        in_shapes += self.perception_dict['order_embed'].out_shapes()
        out_key = 'corr_score' if with_mask else 'piece_idx'
        self.perception_dict[out_key] = CorrelationBlock(
            in_keys=['inventory_embed', 'order_embed'],
            out_keys=out_key,
            in_shapes=in_shapes,
            reduce=True)

        # apply action masking
        if with_mask:
            self.perception_dict['piece_idx'] = ActionMaskingBlock(
                in_keys=['corr_score', 'inventory_mask'],
                out_keys='piece_idx',
                in_shapes=self.perception_dict['corr_score'].out_shapes() +
                [self.obs_shapes['inventory_mask']],
                num_actors=1,
                num_of_actor_actions=None)

        assert self.perception_dict['piece_idx'].out_shapes(
        )[0][0] == action_logits_shapes['piece_idx'][0]

        in_keys = ['ordered_piece', 'inventory']
        if with_mask:
            in_keys.append('inventory_mask')
        self.perception_net = InferenceBlock(
            in_keys=in_keys,
            out_keys='piece_idx',
            in_shapes=[self.obs_shapes[key] for key in in_keys],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict['inventory_embed'].apply(
            make_module_init_normc(0.01))
        self.perception_dict['order_embed'].apply(make_module_init_normc(0.01))

    def forward(self, xx: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Compute forward pass through the network.

        :param xx: Input dict.
        :return: The computed output of the network.
        """
        return self.perception_net(xx)
Exemple #30
0
    def __init__(self, obs_shapes: Dict[str, Sequence[int]],
                 action_logits_shapes: Dict[str, Sequence[int]],
                 non_lin: Union[str, type(nn.Module)], with_mask: bool):
        nn.Module.__init__(self)
        self.obs_shapes = obs_shapes

        hidden_units = 32

        self.perception_dict = OrderedDict()

        self.perception_dict['selected_feat'] = DenseBlock(
            in_keys='selected_piece',
            out_keys='selected_feat',
            in_shapes=self.obs_shapes['selected_piece'],
            hidden_units=[hidden_units],
            non_lin=non_lin)

        self.perception_dict['order_feat'] = DenseBlock(
            in_keys='ordered_piece',
            out_keys='order_feat',
            in_shapes=self.obs_shapes['ordered_piece'],
            hidden_units=[hidden_units],
            non_lin=non_lin)

        self.perception_dict['latent'] = ConcatenationBlock(
            in_keys=['selected_feat', 'order_feat'],
            out_keys='latent',
            in_shapes=[[hidden_units], [hidden_units]],
            concat_dim=-1)

        rotation_out_key = 'cut_rotation_logits' if with_mask else 'cut_rotation'
        self.perception_dict[rotation_out_key] = LinearOutputBlock(
            in_keys='latent',
            out_keys=rotation_out_key,
            in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=action_logits_shapes['cut_rotation'][0])

        if with_mask:
            self.perception_dict['cut_rotation'] = ActionMaskingBlock(
                in_keys=['cut_rotation_logits', 'cutting_mask'],
                out_keys='cut_rotation',
                in_shapes=self.perception_dict['cut_rotation_logits'].
                out_shapes() + [self.obs_shapes['cutting_mask']],
                num_actors=1,
                num_of_actor_actions=None)

        self.perception_dict['cut_order'] = LinearOutputBlock(
            in_keys='latent',
            out_keys='cut_order',
            in_shapes=self.perception_dict['latent'].out_shapes(),
            output_units=action_logits_shapes['cut_order'][0])

        in_keys = ['selected_piece', 'ordered_piece']
        if with_mask:
            in_keys.append('cutting_mask')
        self.perception_net = InferenceBlock(
            in_keys=in_keys,
            out_keys=['cut_rotation', 'cut_order'],
            in_shapes=[self.obs_shapes[key] for key in in_keys],
            perception_blocks=self.perception_dict)

        # initialize model weights
        self.perception_net.apply(make_module_init_normc(1.0))
        self.perception_dict[rotation_out_key].apply(
            make_module_init_normc(0.01))
        self.perception_dict['cut_order'].apply(make_module_init_normc(0.01))