def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
        n_critics: int = 2,
        film_critic: bool = False,
        num_env_params: int = 0,
        share_features_extractor: bool = True,
    ):
        super().__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
        )

        action_dim = get_action_dim(self.action_space)

        self.share_features_extractor = share_features_extractor
        self.n_critics = n_critics
        self.q_networks = []
        self.film = film_critic
        for idx in range(n_critics):
            q_net = create_mlp(features_dim + action_dim, 1, net_arch,
                               activation_fn)
            q_net = nn.Sequential(*q_net)
            self.add_module(f"qf{idx}", q_net)
            self.q_networks.append(q_net)
Beispiel #2
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        features_extractor: nn.Module,
        features_dim: int,
        net_arch: Optional[List[int]] = None,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
    ):
        super(QNetwork, self).__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
        )

        if net_arch is None:
            net_arch = [64, 64]

        self.net_arch = net_arch
        self.activation_fn = activation_fn
        self.features_extractor = features_extractor
        self.features_dim = features_dim
        self.normalize_images = normalize_images
        action_dim = self.action_space.n  # number of actions
        q_net = create_mlp(self.features_dim, action_dim, self.net_arch, self.activation_fn)
        self.q_net = nn.Sequential(*q_net)
Beispiel #3
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
        device: Union[th.device, str] = "auto",
    ):
        super(Actor, self).__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
            device=device,
            squash_output=True,
        )

        self.features_extractor = features_extractor
        self.normalize_images = normalize_images
        self.net_arch = net_arch
        self.features_dim = features_dim
        self.activation_fn = activation_fn

        action_dim = get_action_dim(self.action_space)
        actor_net = create_mlp(features_dim, action_dim, net_arch, activation_fn, squash_output=True)
        # Deterministic action
        self.mu = nn.Sequential(*actor_net)
Beispiel #4
0
    def __init__(self, observation_space: gym.spaces.Space,
                 action_space: gym.spaces.Space,
                 net_arch: List[int],
                 features_extractor: nn.Module,
                 features_dim: int,
                 activation_fn: Type[nn.Module] = nn.ReLU,
                 use_sde: bool = False,
                 log_std_init: float = -3,
                 full_std: bool = True,
                 sde_net_arch: Optional[List[int]] = None,
                 use_expln: bool = False,
                 clip_mean: float = 2.0,
                 normalize_images: bool = True,
                 device: Union[th.device, str] = 'auto'):
        super(Actor, self).__init__(observation_space, action_space,
                                    features_extractor=features_extractor,
                                    normalize_images=normalize_images,
                                    device=device,
                                    squash_output=True)

        # Save arguments to re-create object at loading
        self.use_sde = use_sde
        self.sde_features_extractor = None
        self.sde_net_arch = sde_net_arch
        self.net_arch = net_arch
        self.features_dim = features_dim
        self.activation_fn = activation_fn
        self.log_std_init = log_std_init
        self.sde_net_arch = sde_net_arch
        self.use_expln = use_expln
        self.full_std = full_std
        self.clip_mean = clip_mean

        action_dim = get_action_dim(self.action_space)
        latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn)
        self.latent_pi = nn.Sequential(*latent_pi_net)
        last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim

        if self.use_sde:
            latent_sde_dim = last_layer_dim
            # Separate feature extractor for gSDE
            if sde_net_arch is not None:
                self.sde_features_extractor, latent_sde_dim = create_sde_features_extractor(features_dim, sde_net_arch,
                                                                                            activation_fn)

            self.action_dist = StateDependentNoiseDistribution(action_dim, full_std=full_std, use_expln=use_expln,
                                                               learn_features=True, squash_output=True)
            self.mu, self.log_std = self.action_dist.proba_distribution_net(latent_dim=last_layer_dim,
                                                                            latent_sde_dim=latent_sde_dim,
                                                                            log_std_init=log_std_init)
            # Avoid numerical issues by limiting the mean of the Gaussian
            # to be in [-clip_mean, clip_mean]
            if clip_mean > 0.0:
                self.mu = nn.Sequential(self.mu, nn.Hardtanh(min_val=-clip_mean, max_val=clip_mean))
        else:
            self.action_dist = SquashedDiagGaussianDistribution(action_dim)
            self.mu = nn.Linear(last_layer_dim, action_dim)
            self.log_std = nn.Linear(last_layer_dim, action_dim)
Beispiel #5
0
    def __init__(self, observation_space: gym.spaces.Space,
                 action_space: gym.spaces.Space,
                 net_arch: List[int],
                 features_extractor: nn.Module,
                 features_dim: int,
                 activation_fn: Type[nn.Module] = nn.ReLU,
                 normalize_images: bool = True,
                 device: Union[th.device, str] = 'auto'):
        super(Critic, self).__init__(observation_space, action_space,
                                     features_extractor=features_extractor,
                                     normalize_images=normalize_images,
                                     device=device)

        action_dim = get_action_dim(self.action_space)

        q1_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn)
        self.q1_net = nn.Sequential(*q1_net)

        q2_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn)
        self.q2_net = nn.Sequential(*q2_net)

        self.q_networks = [self.q1_net, self.q2_net]
Beispiel #6
0
    def __init__(self,
                 observation_space: gym.Space,
                 obs_unwrapper_function: Callable,
                 obs_space_dict: Dict[str, gym.Space],
                 normalize_images: bool,
                 features_dim: int = 96):

        super().__init__(observation_space, obs_unwrapper_function,
                         obs_space_dict, normalize_images, features_dim)
        self.cnn_extractor = NatureCNN(obs_space_dict['pov'], features_dim=80)
        self.camera_angle_dim = get_first_dim_from_shape(
            obs_space_dict['cameraAngle'].shape)
        camera_angle_modules = create_mlp(input_dim=self.camera_angle_dim,
                                          output_dim=8,
                                          net_arch=[10])
        self.camera_angle_extractor = nn.Sequential(*camera_angle_modules)
        self.dirt_inventory_dim = get_first_dim_from_shape(
            obs_space_dict['inventory']['dirt'].shape)
        dirt_inventory_modules = create_mlp(input_dim=self.dirt_inventory_dim,
                                            output_dim=8,
                                            net_arch=[10])
        self.dirt_inventory_extractor = nn.Sequential(*dirt_inventory_modules)
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        features_extractor: nn.Module,
        features_dim: int,
        latent_dim: int,
        net_arch: Optional[List[int]] = None,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
    ):
        super(VAE, self).__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
        )

        if net_arch is None:
            net_arch = [64]
        else:
            net_arch = [64]

        self.net_arch = net_arch
        self.activation_fn = activation_fn
        self.features_extractor = features_extractor
        self.features_dim = features_dim
        self.latent_dim = latent_dim
        self.normalize_images = normalize_images
        encoder = create_mlp(self.features_dim, 64, self.net_arch,
                             self.activation_fn)
        self.encoder = nn.Sequential(*encoder)
        self.fc_mu = nn.Linear(64, self.latent_dim)
        self.fc_logsigma = nn.Linear(64, self.latent_dim)
        self.net_arch = [64, 64]
        decoder = create_mlp(self.latent_dim, self.features_dim, self.net_arch,
                             self.activation_fn, True)
        self.decoder = nn.Sequential(*decoder)
Beispiel #8
0
def create_sde_features_extractor(
    features_dim: int, sde_net_arch: List[int], activation_fn: Type[nn.Module]
) -> Tuple[nn.Sequential, int]:
    """
    Create the neural network that will be used to extract features
    for the gSDE exploration function.

    :param features_dim:
    :param sde_net_arch:
    :param activation_fn:
    :return:
    """
    # Special case: when using states as features (i.e. sde_net_arch is an empty list)
    # don't use any activation function
    sde_activation = activation_fn if len(sde_net_arch) > 0 else None
    latent_sde_net = create_mlp(features_dim, -1, sde_net_arch, activation_fn=sde_activation, squash_output=False)
    latent_sde_dim = sde_net_arch[-1] if len(sde_net_arch) > 0 else features_dim
    sde_features_extractor = nn.Sequential(*latent_sde_net)
    return sde_features_extractor, latent_sde_dim
Beispiel #9
0
    def __init__(self,
                 observation_space: gym.spaces.Space,
                 action_space: gym.spaces.Space,
                 net_arch: List[int],
                 features_extractor: nn.Module,
                 features_dim: int,
                 activation_fn: Type[nn.Module] = nn.ReLU,
                 use_sde: bool = False,
                 log_std_init: float = -3,
                 full_std: bool = True,
                 sde_net_arch: Optional[List[int]] = None,
                 use_expln: bool = False,
                 clip_mean: float = 2.0,
                 normalize_images: bool = True,
                 device: Union[th.device, str] = 'auto'):
        super(Actor, self).__init__(observation_space,
                                    action_space,
                                    features_extractor=features_extractor,
                                    normalize_images=normalize_images,
                                    device=device,
                                    squash_output=True)

        # Save arguments to re-create object at loading
        self.use_sde = use_sde
        self.sde_features_extractor = None
        self.sde_net_arch = sde_net_arch
        self.net_arch = net_arch
        self.features_dim = features_dim
        self.activation_fn = activation_fn
        self.log_std_init = log_std_init
        self.sde_net_arch = sde_net_arch
        self.use_expln = use_expln
        self.full_std = full_std
        self.clip_mean = clip_mean

        action_dim = get_action_dim(self.action_space)
        latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn)
        self.latent_pi = nn.Sequential(*latent_pi_net)
        last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim

        self.mu = nn.Linear(last_layer_dim, action_dim)
        self.log_std = nn.Linear(last_layer_dim, action_dim)
Beispiel #10
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
        share_features_extractor: bool = True,
        action_dist_num=32,
    ):
        super().__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
        )
        self.share_features_extractor = share_features_extractor
        self.n_cos = 64
        self.action_dist_num = action_dist_num
        self.first_hidden_size = net_arch[0]

        self.pis = th.FloatTensor([
            np.pi * i for i in range(1, self.n_cos + 1)
        ]).view(1, 1, self.n_cos).to(self.device)

        action_dim = get_action_dim(self.action_space)

        net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn)
        self.net_head = nn.Sequential(*net[0:2])
        self.net_cos_embedding = nn.Sequential(
            nn.Linear(self.n_cos, self.first_hidden_size), activation_fn())
        self.net_out = nn.Sequential(*net[2:])

        self.add_module("net_head", self.net_head)
        self.add_module("net_cos_embedding", self.net_cos_embedding)
        self.add_module("net_out", self.net_out)
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        mask_policy: int = 0,
        normalize_images: bool = True,
    ):
        super(Actor, self).__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
            squash_output=True,
        )

        self.net_arch = net_arch
        self.features_dim = features_dim
        self.activation_fn = activation_fn

        action_dim = get_action_dim(self.action_space)
        actor_net = create_mlp(features_dim,
                               action_dim,
                               net_arch,
                               activation_fn,
                               squash_output=True)
        # Deterministic action
        self.mu = nn.Sequential(*actor_net)

        policy_mask = [True] * features_dim
        for i in range(1, mask_policy + 1):
            policy_mask[-i] = False
        self.policy_mask = th.Tensor(policy_mask)
Beispiel #12
0
    def __init__(self,
                 observation_space: gym.Space,
                 obs_unwrapper_function: Callable,
                 obs_space_dict: Dict[str, gym.Space],
                 normalize_images: bool,
                 features_dim: int = 20,
                 cnn_extractor_class: BaseFeaturesExtractor = NatureCNN,
                 cnn_feature_dim: int = 12,
                 mlp_net_arch: Iterable = (4, ),
                 mlp_feature_dim: int = 6,
                 embedding_dim: int = 6):
        super().__init__(observation_space, obs_unwrapper_function,
                         obs_space_dict, normalize_images, features_dim)

        # This gets the string obs spaces associated with each extractor
        # They're stored in a nested _ separated string
        self.split_chars = "__"
        self.inferred_extractor_mapping = self.recursive_space_infer(
            obs_space_dict)
        self.cnn_spaces = self.inferred_extractor_mapping['CNN']
        self.mlp_spaces = self.inferred_extractor_mapping['MLP']
        self.embed_spaces = self.inferred_extractor_mapping['EMBED']

        _cnn_extractors = []
        total_flattened_dim = 0

        # Create CNN extractors
        for space_designation in self.cnn_spaces:
            cnn_space = recursive_lookup_from_string(obs_space_dict,
                                                     space_designation,
                                                     self.split_chars)
            assert is_image_space(cnn_space)
            _cnn_extractors.append(
                cnn_extractor_class(cnn_space, cnn_feature_dim))
            total_flattened_dim += cnn_feature_dim
        self.cnn_extractors = nn.ModuleList(_cnn_extractors)

        # Create MLP Extractor
        total_mlp_dim = 0
        if len(self.mlp_spaces) > 0:
            for space_designation in self.mlp_spaces:
                mlp_space = recursive_lookup_from_string(
                    obs_space_dict, space_designation, self.split_chars)
                assert isinstance(mlp_space, gym.spaces.Box)
                # assume if the space is multi-dimensional, we'll flatten it
                # before sending it to a MLP
                n_dim = int(np.prod(mlp_space.shape))
                total_mlp_dim += n_dim
            self.mlp_extractor = nn.Sequential(
                *create_mlp(total_mlp_dim, mlp_feature_dim, mlp_net_arch))
            total_flattened_dim += mlp_feature_dim
        else:
            self.mlp_extractor = None

        # Create Embed tables
        if len(self.embed_spaces) > 0:
            _embedding_tables = []
            for space_designation in self.embed_spaces:
                embed_space = recursive_lookup_from_string(
                    obs_space_dict, space_designation, self.split_chars)
                assert isinstance(embed_space, gym.spaces.Discrete)
                space_n = embed_space.n
                _embedding_tables.append(
                    nn.Embedding(embedding_dim=embedding_dim,
                                 num_embeddings=space_n))
                total_flattened_dim += embedding_dim

            self.embedding_tables = nn.ModuleList(_embedding_tables)
        else:
            self.embedding_tables = None
        self.projection_layer = nn.Linear(total_flattened_dim, features_dim)