Ejemplo n.º 1
0
    def __init__(self, multipolicy, idx):
        self.save_init_params(locals())
        super(WeightedMultiPolicySelector, self).__init__()
        ExplorationPolicy.__init__(self, multipolicy.action_dim)

        self._multipolicy = multipolicy
        self.idx = idx
Ejemplo n.º 2
0
    def __init__(self,
                 obs_dim,
                 action_dim,
                 hidden_sizes,
                 std=None,
                 hidden_w_init='xavier_normal',
                 hidden_b_init_val=0.1,
                 output_w_init='xavier_normal',
                 output_b_init_val=0.1,
                 **kwargs):
        self.save_init_params(locals())
        super(GaussianPolicy,
              self).__init__(hidden_sizes,
                             input_size=obs_dim,
                             output_size=action_dim,
                             hidden_w_init=hidden_w_init,
                             hidden_b_init_val=hidden_b_init_val,
                             output_w_init=output_w_init,
                             output_b_init_val=output_b_init_val,
                             **kwargs)
        ExplorationPolicy.__init__(self, action_dim)

        self.log_std = None
        self.std = std
        if std is None:
            last_hidden_size = obs_dim
            if len(hidden_sizes) > 0:
                last_hidden_size = hidden_sizes[-1]
            self.last_fc_log_std = nn.Linear(last_hidden_size, action_dim)

            if output_w_init == 'xavier_normal':
                ptu.layer_init(layer=self.last_fc_log_std,
                               activation='linear',
                               b=output_b_init_val)
            elif output_w_init == 'xavier_normal_0.1':
                ptu.layer_init(layer=self.last_fc_log_std,
                               activation='0.1',
                               b=output_b_init_val)
            elif output_w_init == 'xavier_normal_0.01':
                ptu.layer_init(layer=self.last_fc_log_std,
                               activation='0.01',
                               b=output_b_init_val)
            elif output_w_init == 'xavier_normal_0.001':
                ptu.layer_init(layer=self.last_fc_log_std,
                               activation='0.001',
                               b=output_b_init_val)
            elif output_w_init == 'xavier_normal_0.003':
                ptu.layer_init(layer=self.last_fc_log_std,
                               activation='0.003',
                               b=output_b_init_val)
            else:
                raise ValueError("Wrong init value:%s" % output_w_init)

        else:
            self.log_std = torch.log(std)
            assert LOG_SIG_MIN <= self.log_std <= LOG_SIG_MAX
Ejemplo n.º 3
0
    def __init__(self,
                 obs_dim,
                 action_dim,
                 hidden_sizes,
                 std=None,
                 hidden_w_init='xavier_normal',
                 hidden_b_init_val=0,
                 output_w_init='xavier_normal',
                 output_b_init_val=0,
                 **kwargs):
        """

        Args:
            obs_dim:
            action_dim:
            hidden_sizes:
            std:
            hidden_w_init:
            hidden_b_init_val:
            output_w_init:
            output_b_init_val:
            **kwargs:
        """
        self.save_init_params(locals())
        super(TanhGaussianPolicy,
              self).__init__(hidden_sizes,
                             input_size=obs_dim,
                             output_size=action_dim,
                             hidden_w_init=hidden_w_init,
                             hidden_b_init_val=hidden_b_init_val,
                             output_w_init=output_w_init,
                             output_b_init_val=output_b_init_val,
                             **kwargs)
        ExplorationPolicy.__init__(self, action_dim)

        self.log_std = None
        self.std = std
        if std is None:
            last_hidden_size = obs_dim
            if len(hidden_sizes) > 0:
                last_hidden_size = hidden_sizes[-1]
            self.last_fc_log_std = nn.Linear(last_hidden_size, action_dim)
            ptu.layer_init(layer=self.last_fc_log_std,
                           option=output_w_init,
                           activation='linear',
                           b=output_b_init_val)
        else:
            self.log_std = math.log(std)
            assert LOG_SIG_MIN <= self.log_std <= LOG_SIG_MAX

        self._normal_dist = Normal(loc=ptu.zeros(action_dim),
                                   scale=ptu.ones(action_dim))
Ejemplo n.º 4
0
    def __init__(self,
                 obs_dim,
                 action_dim,
                 n_policies,
                 shared_hidden_sizes,
                 unshared_hidden_sizes=None,
                 stds=None,
                 hidden_activation=F.relu,
                 output_activation=identity,
                 hidden_w_init=ptu.xavier_initOLD,
                 hidden_b_init_val=0,
                 output_w_init=ptu.xavier_initOLD,
                 output_b_init_val=0,
                 shared_layer_norm=False,
                 unshared_layer_norm=False,
                 layer_norm_kwargs=None,
                 **kwargs):
        self.save_init_params(locals())
        super(TanhGaussianMultiPolicy, self).__init__()
        ExplorationPolicy.__init__(self, action_dim)

        if layer_norm_kwargs is None:
            layer_norm_kwargs = dict()

        self.input_size = obs_dim
        self.output_sizes = action_dim
        self._n_policies = n_policies
        self.hidden_activation = hidden_activation
        self.output_activation = output_activation
        self.shared_layer_norm = shared_layer_norm
        self.unshared_layer_norm = unshared_layer_norm
        self.fcs = []
        self.shared_layer_norms = []
        self.ufcs = [list() for _ in range(self._n_policies)]
        self.unshared_layer_norms = [list() for _ in range(self._n_policies)]
        self.last_fcs = []
        in_size = self.input_size

        # Shared Layers
        for i, next_size in enumerate(shared_hidden_sizes):
            fc = nn.Linear(in_size, next_size)
            in_size = next_size
            hidden_w_init(fc.weight)
            ptu.fill(fc.bias, hidden_b_init_val)
            self.__setattr__("fc{}".format(i), fc)
            self.fcs.append(fc)

            if self.shared_layer_norm:
                ln = LayerNorm(next_size)
                self.__setattr__("shared_layer_norm{}".format(i), ln)
                self.shared_layer_norms.append(ln)

        # Unshared Layers
        if unshared_hidden_sizes is not None:
            for i, next_size in enumerate(unshared_hidden_sizes):
                for pol_idx in range(self._n_policies):
                    ufc = nn.Linear(in_size, next_size)
                    hidden_w_init(ufc.weight)
                    ptu.fill(ufc.bias, hidden_b_init_val)
                    self.__setattr__("ufc{}_{}".format(pol_idx, i), ufc)
                    self.ufcs[pol_idx].append(ufc)

                    if self.unshared_layer_norm:
                        ln = LayerNorm(next_size)
                        tmp_txt = "unshared_layer_norm{}_{}".format(pol_idx, i)
                        self.__setattr__(tmp_txt, ln)
                        self.unshared_layer_norms[pol_idx].append(ln)
                in_size = next_size

        for pol_idx in range(self._n_policies):
            last_fc = nn.Linear(in_size, self._action_dim)
            output_w_init(last_fc.weight)
            ptu.fill(last_fc.bias, output_b_init_val)
            self.__setattr__("last_fc{}".format(pol_idx), last_fc)
            self.last_fcs.append(last_fc)

        self.stds = stds
        self.log_std = list()
        if stds is None:
            self.last_fc_log_stds = list()
            for pol_idx in range(self._n_policies):
                last_hidden_size = obs_dim
                if unshared_hidden_sizes is None:
                    if len(shared_hidden_sizes) > 0:
                        last_hidden_size = shared_hidden_sizes[-1]
                else:
                    last_hidden_size = unshared_hidden_sizes[-1]
                last_fc_log_std = nn.Linear(last_hidden_size, action_dim)
                hidden_w_init(last_fc_log_std.weight)
                ptu.fill(last_fc_log_std.bias, hidden_b_init_val)
                self.__setattr__("last_fc_log_std{}".format(pol_idx),
                                 last_fc_log_std)
                self.last_fc_log_stds.append(last_fc_log_std)

        else:
            for std in stds:
                self.log_std.append(np.log(stds))
                assert LOG_SIG_MIN <= self.log_std[-1] <= LOG_SIG_MAX
    def __init__(self,
                 obs_dim,
                 action_dim,
                 n_policies,
                 latent_dim,
                 shared_hidden_sizes=None,
                 unshared_hidden_sizes=None,
                 unshared_mix_hidden_sizes=None,
                 unshared_policy_hidden_sizes=None,
                 stds=None,
                 hidden_activation='relu',
                 hidden_w_init='xavier_normal',
                 hidden_b_init_val=1e-2,
                 output_w_init='xavier_normal',
                 output_b_init_val=1e-2,
                 pol_output_activation='linear',
                 mix_output_activation='linear',
                 final_pol_output_activation='linear',
                 input_norm=False,
                 shared_layer_norm=False,
                 policies_layer_norm=False,
                 mixture_layer_norm=False,
                 final_policy_layer_norm=False,
                 epsilon=1e-6,
                 softmax_weights=False,
                 **kwargs):
        self.save_init_params(locals())
        TanhGaussianComposedMultiPolicy.__init__(self)
        ExplorationPolicy.__init__(self, action_dim)

        self._input_size = obs_dim
        self._output_sizes = action_dim
        self._n_subpolicies = n_policies
        self._latent_size = latent_dim
        # Activation Fcns
        self._hidden_activation = ptu.get_activation(hidden_activation)
        self._pol_output_activation = ptu.get_activation(pol_output_activation)
        self._mix_output_activation = ptu.get_activation(mix_output_activation)
        self._final_pol_output_activation = ptu.get_activation(
            final_pol_output_activation)
        # Normalization Layer Flags
        self._shared_layer_norm = shared_layer_norm
        self._policies_layer_norm = policies_layer_norm
        self._mixture_layer_norm = mixture_layer_norm
        self._final_policy_layer_norm = final_policy_layer_norm
        # Layers Lists
        self._sfcs = []  # Shared Layers
        self._sfc_norms = []  # Norm. Shared Layers
        self._pfcs = [list()
                      for _ in range(self._n_subpolicies)]  # Policies Layers
        self._pfc_norms = [list()
                           for _ in range(self._n_subpolicies)]  # N. Pol. L.
        self._pfc_lasts = []  # Last Policies Layers
        self._mfcs = []  # Mixing Layers
        self._norm_mfcs = []  # Norm. Mixing Layers
        # self.mfc_last = None  # Below is instantiated
        self._fpfcs = []  # Final Policy Layers
        self._norm_fpfcs = []  # Norm. Mixing Layers

        self._softmax_weights = softmax_weights

        # Initial size = Obs size
        in_size = self._input_size

        # Ordered Dictionaries for specific modules/parameters
        self._shared_modules = OrderedDict()
        self._shared_parameters = OrderedDict()
        self._policies_modules = [OrderedDict() for _ in range(n_policies)]
        self._policies_parameters = [OrderedDict() for _ in range(n_policies)]
        self._mixing_modules = OrderedDict()
        self._mixing_parameters = OrderedDict()
        self._final_policy_modules = OrderedDict()
        self._final_policy_parameters = OrderedDict()

        # ############# #
        # Shared Layers #
        # ############# #
        if input_norm:
            ln = nn.BatchNorm1d(in_size)
            self.sfc_input = ln
            self.add_shared_module("sfc_input", ln)
            self.__setattr__("sfc_input", ln)
        else:
            self.sfc_input = None

        if shared_hidden_sizes is not None:
            for ii, next_size in enumerate(shared_hidden_sizes):
                sfc = nn.Linear(in_size, next_size)
                ptu.layer_init(
                    layer=sfc,
                    option=hidden_w_init,
                    activation=hidden_activation,
                    b=hidden_b_init_val,
                )
                self.__setattr__("sfc{}".format(ii), sfc)
                self._sfcs.append(sfc)
                self.add_shared_module("sfc{}".format(ii), sfc)

                if self._shared_layer_norm:
                    ln = LayerNorm(next_size)
                    # ln = nn.BatchNorm1d(next_size)
                    self.__setattr__("sfc{}_norm".format(ii), ln)
                    self._sfc_norms.append(ln)
                    self.add_shared_module("sfc{}_norm".format(ii), ln)
                in_size = next_size

        # Get the output_size of the shared layers (assume same for all)
        multipol_in_size = in_size

        # ############### #
        # Unshared Layers #
        # ############### #
        # Unshared Multi-Policy Hidden Layers
        if unshared_hidden_sizes is not None:
            for ii, next_size in enumerate(unshared_hidden_sizes):
                for pol_idx in range(self._n_subpolicies):
                    pfc = nn.Linear(multipol_in_size, next_size)
                    ptu.layer_init(layer=pfc,
                                   option=hidden_w_init,
                                   activation=hidden_activation,
                                   b=hidden_b_init_val)
                    self.__setattr__("pfc{}_{}".format(pol_idx, ii), pfc)
                    self._pfcs[pol_idx].append(pfc)
                    self.add_policies_module("pfc{}_{}".format(pol_idx, ii),
                                             pfc,
                                             idx=pol_idx)

                    if self._policies_layer_norm:
                        ln = LayerNorm(next_size)
                        # ln = nn.BatchNorm1d(next_size)
                        self.__setattr__("pfc{}_{}_norm".format(pol_idx, ii),
                                         ln)
                        self._pfc_norms[pol_idx].append(ln)
                        self.add_policies_module("pfc{}_{}_norm".format(
                            pol_idx, ii),
                                                 ln,
                                                 idx=pol_idx)
                multipol_in_size = next_size

        # Multi-Policy Last Layers
        for pol_idx in range(self._n_subpolicies):
            last_pfc = nn.Linear(multipol_in_size, latent_dim)
            ptu.layer_init(layer=last_pfc,
                           option=output_w_init,
                           activation=pol_output_activation,
                           b=output_b_init_val)
            self.__setattr__("pfc{}_last".format(pol_idx), last_pfc)
            self._pfc_lasts.append(last_pfc)
            self.add_policies_module("pfc{}_last".format(pol_idx),
                                     last_pfc,
                                     idx=pol_idx)

        # ############# #
        # Mixing Layers #
        # ############# #
        mixture_in_size = in_size + latent_dim * self._n_subpolicies
        # Unshared Mixing-Weights Hidden Layers
        if unshared_mix_hidden_sizes is not None:
            for ii, next_size in enumerate(unshared_mix_hidden_sizes):
                mfc = nn.Linear(mixture_in_size, next_size)
                ptu.layer_init(
                    layer=mfc,
                    option=hidden_w_init,
                    activation=hidden_activation,
                    b=hidden_b_init_val,
                )
                self.__setattr__("mfc{}".format(ii), mfc)
                self._mfcs.append(mfc)
                # Add it to specific dictionaries
                self.add_mixing_module("mfc{}".format(ii), mfc)

                if self._mixture_layer_norm:
                    ln = LayerNorm(next_size)
                    # ln = nn.BatchNorm1d(next_size)
                    self.__setattr__("mfc{}_norm".format(ii), ln)
                    self._norm_mfcs.append(ln)
                    self.add_mixing_module("mfc{}_norm".format(ii), ln)
                mixture_in_size = next_size

        # Unshared Mixing-Weights Last Layers
        mfc_last = nn.Linear(mixture_in_size, latent_dim)
        ptu.layer_init(
            layer=mfc_last,
            option=output_w_init,
            activation=mix_output_activation,
            b=output_b_init_val,
        )
        self.__setattr__("mfc_last", mfc_last)
        self.mfc_last = mfc_last
        # Add it to specific dictionaries
        self.add_mixing_module("mfc_last", mfc_last)

        if softmax_weights:
            raise ValueError("Check if it is correct a softmax")
            # self.mfc_softmax = nn.Softmax(dim=1)
        else:
            self.mfc_softmax = None

        # ################### #
        # Final Policy Layers #
        # ################### #
        final_pol_in_size = latent_dim
        if unshared_policy_hidden_sizes is not None:
            for ii, next_size in enumerate(unshared_policy_hidden_sizes):
                fpfc = nn.Linear(final_pol_in_size, next_size)
                ptu.layer_init(layer=fpfc,
                               option=hidden_w_init,
                               activation=hidden_activation,
                               b=hidden_b_init_val)
                self.__setattr__("fpfc{}".format(ii), fpfc)
                self._fpfcs.append(fpfc)
                # Add it to specific dictionaries
                self.add_final_policy_module("fpfc{}".format(ii), fpfc)

                if self._mixture_layer_norm:
                    ln = LayerNorm(next_size)
                    # ln = nn.BatchNorm1d(next_size)
                    self.__setattr__("fpfc{}_norm".format(ii), ln)
                    self._norm_fpfcs.append(ln)
                    self.add_final_policy_module("fpfc{}_norm".format(ii), ln)
                final_pol_in_size = next_size

        # Unshared Final Policy Last Layer
        fpfc_last = nn.Linear(final_pol_in_size, action_dim)
        ptu.layer_init(layer=fpfc_last,
                       option=output_w_init,
                       activation=final_pol_output_activation,
                       b=output_b_init_val)
        self.__setattr__("fpfc_last", fpfc_last)
        self.fpfc_last = fpfc_last
        # Add it to specific dictionaries
        self.add_final_policy_module("fpfc_last", fpfc_last)

        # ########## #
        # Std Layers #
        # ########## #
        # Multi-Policy Log-Stds Last Layers
        fpfc_last_log_std = nn.Linear(final_pol_in_size, action_dim)
        ptu.layer_init(layer=fpfc_last_log_std,
                       option=output_w_init,
                       activation=final_pol_output_activation,
                       b=output_b_init_val)
        self.__setattr__("fpfc_last_log_std", fpfc_last_log_std)
        self.fpfc_last_log_std = fpfc_last_log_std
        # Add it to specific dictionaries
        self.add_final_policy_module("fpfc_last_log_std", fpfc_last_log_std)

        self._normal_dist = Normal(loc=ptu.zeros(action_dim),
                                   scale=ptu.ones(action_dim))
        self._epsilon = epsilon

        self._pols_idxs = ptu.arange(self._n_subpolicies)
        self._compo_pol_idx = torch.tensor([self._n_subpolicies],
                                           dtype=torch.int64,
                                           device=ptu.device)
Ejemplo n.º 6
0
    def __init__(self,
                 obs_dim,
                 action_dim,
                 n_policies,
                 shared_hidden_sizes=None,
                 unshared_hidden_sizes=None,
                 unshared_mix_hidden_sizes=None,
                 stds=None,
                 hidden_activation='relu',
                 hidden_w_init='xavier_normal',
                 hidden_b_init_val=1e-2,
                 output_w_init='xavier_normal',
                 output_b_init_val=1e-2,
                 pol_output_activation='linear',
                 mix_output_activation='linear',
                 input_norm=False,
                 shared_layer_norm=False,
                 policies_layer_norm=False,
                 mixture_layer_norm=False,
                 epsilon=1e-6,
        ):
        self.save_init_params(locals())
        super(TanhGaussianMixtureMultiPolicy, self).__init__()
        ExplorationPolicy.__init__(self, action_dim)

        self._input_size = obs_dim
        self._output_sizes = action_dim
        self._n_subpolicies = n_policies
        # Activation Fcns
        self._hidden_activation = ptu.get_activation(hidden_activation)
        self._pol_output_activation = ptu.get_activation(pol_output_activation)
        self._mix_output_activation = ptu.get_activation(mix_output_activation)
        # Normalization Layer Flags
        self._shared_layer_norm = shared_layer_norm
        self._policies_layer_norm = policies_layer_norm
        self._mixture_layer_norm = mixture_layer_norm
        # Layers Lists
        self._sfcs = []  # Shared Layers
        self._sfc_norms = []  # Norm. Shared Layers
        self._pfcs = [list() for _ in range(self._n_subpolicies)]  # Policies Layers
        self._pfc_norms = [list() for _ in range(self._n_subpolicies)]  # N. Pol. L.
        self._pfc_lasts = []  # Last Policies Layers
        self._mfcs = []  # Mixing Layers
        self._norm_mfcs = []  # Norm. Mixing Layers
        # self.mfc_last = None  # Below is instantiated

        # Initial size = Obs size
        in_size = self._input_size

        # Ordered Dictionaries for specific modules/parameters
        self._shared_modules = OrderedDict()
        self._shared_parameters = OrderedDict()
        self._policies_modules = [OrderedDict() for _ in range(n_policies)]
        self._policies_parameters = [OrderedDict() for _ in range(n_policies)]
        self._mixing_modules = OrderedDict()
        self._mixing_parameters = OrderedDict()

        # ############# #
        # Shared Layers #
        # ############# #
        if input_norm:
            ln = nn.BatchNorm1d(in_size)
            self.sfc_input = ln
            self.add_shared_module("sfc_input", ln)
        else:
            self.sfc_input = None

        if shared_hidden_sizes is not None:
            for ii, next_size in enumerate(shared_hidden_sizes):
                sfc = nn.Linear(in_size, next_size)
                ptu.layer_init(
                    layer=sfc,
                    option=hidden_w_init,
                    activation=hidden_activation,
                    b=hidden_b_init_val,
                )
                self.__setattr__("sfc{}".format(ii), sfc)
                self._sfcs.append(sfc)
                self.add_shared_module("sfc{}".format(ii), sfc)

                if self._shared_layer_norm:
                    ln = LayerNorm(next_size)
                    # ln = nn.BatchNorm1d(next_size)
                    self.__setattr__("sfc{}_norm".format(ii), ln)
                    self._sfc_norms.append(ln)
                    self.add_shared_module("sfc{}_norm".format(ii), ln)
                in_size = next_size

        # Get the output_size of the shared layers (assume same for all)
        multipol_in_size = in_size
        mixture_in_size = in_size

        # ############### #
        # Unshared Layers #
        # ############### #
        # Unshared Multi-Policy Hidden Layers
        if unshared_hidden_sizes is not None:
            for ii, next_size in enumerate(unshared_hidden_sizes):
                for pol_idx in range(self._n_subpolicies):
                    pfc = nn.Linear(multipol_in_size, next_size)
                    ptu.layer_init(
                        layer=pfc,
                        option=hidden_w_init,
                        activation=hidden_activation,
                        b=hidden_b_init_val
                    )
                    self.__setattr__("pfc{}_{}".format(pol_idx, ii), pfc)
                    self._pfcs[pol_idx].append(pfc)
                    self.add_policies_module("pfc{}_{}".format(pol_idx, ii),
                                             pfc, idx=pol_idx)

                    if self._policies_layer_norm:
                        ln = LayerNorm(next_size)
                        # ln = nn.BatchNorm1d(next_size)
                        self.__setattr__("pfc{}_{}_norm".format(pol_idx, ii),
                                         ln)
                        self._pfc_norms[pol_idx].append(ln)
                        self.add_policies_module("pfc{}_{}_norm".format(pol_idx,
                                                                        ii),
                                                 ln, idx=pol_idx)
                multipol_in_size = next_size

        # Multi-Policy Last Layers
        for pol_idx in range(self._n_subpolicies):
            last_pfc = nn.Linear(multipol_in_size, action_dim)
            ptu.layer_init(
                layer=last_pfc,
                option=output_w_init,
                activation=pol_output_activation,
                b=output_b_init_val
            )
            self.__setattr__("pfc{}_last".format(pol_idx), last_pfc)
            self._pfc_lasts.append(last_pfc)
            self.add_policies_module("pfc{}_last".format(pol_idx), last_pfc,
                                     idx=pol_idx)

        # Multi-Policy Log-Stds Last Layers
        self.stds = stds
        self.log_std = list()
        if stds is None:
            self._pfc_log_std_lasts = list()
            for pol_idx in range(self._n_subpolicies):
                last_pfc_log_std = nn.Linear(multipol_in_size, action_dim)
                ptu.layer_init(
                    layer=last_pfc_log_std,
                    option=output_w_init,
                    activation=pol_output_activation,
                    b=output_b_init_val
                )
                self.__setattr__("pfc{}_log_std_last".format(pol_idx),
                                 last_pfc_log_std)
                self._pfc_log_std_lasts.append(last_pfc_log_std)
                self.add_policies_module("pfc{}_log_std_last".format(pol_idx),
                                         last_pfc_log_std, idx=pol_idx)

        else:
            for std in stds:
                self.log_std.append(torch.log(stds))
                assert LOG_SIG_MIN <= self.log_std[-1] <= LOG_SIG_MAX

        # ############# #
        # Mixing Layers #
        # ############# #
        # Unshared Mixing-Weights Hidden Layers
        if unshared_mix_hidden_sizes is not None:
            for ii, next_size in enumerate(unshared_mix_hidden_sizes):
                mfc = nn.Linear(mixture_in_size, next_size)
                ptu.layer_init(
                    layer=mfc,
                    option=hidden_w_init,
                    activation=hidden_activation,
                    b=hidden_b_init_val
                )
                self.__setattr__("mfc{}".format(ii), mfc)
                self._mfcs.append(mfc)
                # Add it to specific dictionaries
                self.add_mixing_module("mfc{}".format(ii), mfc)

                if self._mixture_layer_norm:
                    ln = LayerNorm(next_size)
                    # ln = nn.BatchNorm1d(next_size)
                    self.__setattr__("mfc{}_norm".format(ii), ln)
                    self._norm_mfcs.append(ln)
                    self.add_mixing_module("mfc{}_norm".format(ii), ln)
                mixture_in_size = next_size

        # Unshared Mixing-Weights Last Layers
        mfc_last = nn.Linear(mixture_in_size, self._n_subpolicies * action_dim)
        ptu.layer_init(
            layer=mfc_last,
            option=output_w_init,
            activation=mix_output_activation,
            b=output_b_init_val
        )
        self.__setattr__("mfc_last", mfc_last)
        self.mfc_last = mfc_last
        # Add it to specific dictionaries
        self.add_mixing_module("mfc_last", mfc_last)

        softmax_weights = True
        if softmax_weights:
            self.mfc_softmax = nn.Softmax(dim=1)
        else:
            self.mfc_softmax = None

        self._normal_dist = Normal(loc=ptu.zeros(action_dim),
                                   scale=ptu.ones(action_dim))
        self._epsilon = epsilon

        self._pols_idxs = ptu.arange(self._n_subpolicies)