コード例 #1
0
class Policy(nn.Module):
    def __init__(self, nn, action_space, noisy_net=False):
        super(Policy, self).__init__()

        assert isinstance(nn, torch.nn.Module)
        self.nn = nn

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.nn.output_size,
                                    num_outputs,
                                    noisy=noisy_net)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.nn.output_size, num_outputs)
        else:
            raise NotImplementedError

    @property
    def is_recurrent(self):
        return self.nn.is_recurrent

    @property
    def recurrent_hidden_state_size(self):
        """Size of rnn_hx."""
        return self.nn.recurrent_hidden_state_size

    def forward(self, inputs, rnn_hxs, masks):
        raise NotImplementedError

    def act(self, inputs, rnn_hxs, masks, deterministic=False):
        value, actor_features, rnn_hxs = self.nn(inputs, rnn_hxs, masks)
        dist = self.dist(actor_features)

        if deterministic:
            action = dist.mode()
        else:
            action = dist.sample()

        action_log_probs = dist.log_probs(action)
        _ = dist.entropy().mean()

        return value, action, action_log_probs, rnn_hxs

    def get_value(self, inputs, rnn_hxs, masks):
        value, _, _ = self.nn(inputs, rnn_hxs, masks)
        return value

    def evaluate_actions(self, inputs, rnn_hxs, masks, action):
        value, actor_features, rnn_hxs = self.nn(inputs, rnn_hxs, masks)
        dist = self.dist(actor_features)

        action_log_probs = dist.log_probs(action)
        dist_entropy = dist.entropy().mean()

        return value, action_log_probs, dist_entropy, rnn_hxs

    def reset_noise(self):
        self.nn.reset_noise()
        self.dist.reset_noise()
コード例 #2
0
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear = nn.Linear(512, 1)

        self.V_linear_1 = nn.Linear(512, 20)
        self.V_linear_2 = nn.Linear(20, 1)
        self.Q_linear_1 = nn.Linear(512 + action_space.n, 20)
        self.Q_linear_2 = nn.Linear(20, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
コード例 #3
0
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        # self.conv1_bn = nn.BatchNorm2d(32)
        # self.conv2_bn = nn.BatchNorm2d(64)
        # self.conv3_bn = nn.BatchNorm2d(32)

        self.act_func = F.leaky_relu  # F.tanh ##  F.elu F.relu F.softplus

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear = nn.Linear(512, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
コード例 #4
0
    def __init__(self, nn, action_space, noisy_net=False):
        super(Policy, self).__init__()

        assert isinstance(nn, torch.nn.Module)
        self.nn = nn

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.nn.output_size,
                                    num_outputs,
                                    noisy=noisy_net)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.nn.output_size, num_outputs)
        else:
            raise NotImplementedError
コード例 #5
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 recurrent_policy,
                 hidden_size,
                 args):

        super(Policy, self).__init__()
        if len(obs_shape) == 3:
            self.base = CNNBase(obs_shape[0], recurrent_policy)
        elif len(obs_shape) == 1:
            assert not recurrent_policy, \
                "Recurrent policy is not implemented for the MLP controller"
            self.base = MLPBase(obs_shape[0], hidden_size, args)
        else:
            raise NotImplementedError

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        self.state_size = self.base.state_size
        self.leaky = args.leaky
        self.scale = 1.
コード例 #6
0
    def __init__(
        self,
        observability: str,
        option_dims: int,
        option_space: str,
        # obs_spaces: collections.OrderedDict,
        hidden_size: int,
        base_model: str,
        base_kwargs: Dict,
        # policy_base_kwargs: Dict,
    ):
        super().__init__(
            observability=observability,
            action_dims=option_dims,
            base_model=base_model,
            base_kwargs=base_kwargs,
        )
        assert option_space in ['continuous', 'discrete']
        self.option_space = option_space

        if self.option_space == 'continuous':
            # Overwrite Policy class attributes
            # self.fc12 = init_(nn.Linear(hidden_size,
            #     2 * omega_option_dims))
            self.dist = DiagGaussian(self.base.output_size, option_dims)
コード例 #7
0
ファイル: policy_network.py プロジェクト: yes7rose/DEHRL
    def __init__(self, obs_shape, action_space, one_hot, hid_size, recurrent_policy, label):
        super(EHRL_Policy, self).__init__()

        self.hid_size = hid_size
        self.label = label
        # self.num_hid_layers = num_hid_layers
        # self.num_subpolicies = num_subpolicies
        # self.gaussian_fixed_var = gaussian_fixed_var

        if len(obs_shape) == 3:
            self.base = CNNBase(obs_shape[0], one_hot, self.hid_size, recurrent_policy)
        elif len(obs_shape) == 1:
            assert not recurrent_policy, \
                "Recurrent policy is not implemented for the MLP controller"
            self.base = MLPBase(obs_shape[0], one_hot, self.hid_size)
        else:
            raise NotImplementedError

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        self.state_size = self.base.state_size
コード例 #8
0
    def __init__(self, num_inputs, action_space):
        super(MLPPolicy, self).__init__()

        self.action_space = action_space

        self.input_norm = WelfordNormalization(num_inputs)

        self.a_fc1 = nn.Linear(num_inputs, 64)
        self.a_fc2 = nn.Linear(64, 64)

        self.v_fc1 = nn.Linear(num_inputs, 64)
        self.v_fc2 = nn.Linear(64, 64)
        self.v_fc3 = nn.Linear(64, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(64, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
コード例 #9
0
    def __init__(self, num_inputs, action_space, num_heads=1, hidden_size=512):
        super(CNNPolicy, self).__init__()
        self.num_heads = num_heads

        self.representations = []
        self.critics = []
        for _ in range(num_heads):
            self.representations.append(
                self.build_representation(num_inputs, hidden_size=hidden_size))
            self.critics.append(self.build_critic(hidden_size, 1))

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(hidden_size, num_outputs)
        else:
            raise NotImplementedError

        self.critics = nn.ModuleList(self.critics)
        self.representations = nn.ModuleList(self.representations)

        self.param_groups = [list(self.parameters())]

        self.train()
        self.reset_parameters()
コード例 #10
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            num_action_outputs = 512
            self.dist = Categorical(num_action_outputs,num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
コード例 #11
0
    def __init__(self, num_inputs, action_space, use_gru):
        super(CNNPolicy, self).__init__()

        #print('num_inputs=%s' % str(num_inputs))

        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=2)
        self.conv2 = nn.Conv2d(32, 32, 4, stride=2)
        self.conv3 = nn.Conv2d(32, 32, 4, stride=2)
        self.conv4 = nn.Conv2d(32, 32, 4, stride=1)

        self.linear1 = nn.Linear(32 * 2 * 2, 256)

        if use_gru:
            self.gru = nn.GRUCell(512, 512)

        self.critic_linear = nn.Linear(256, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(256, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(256, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
コード例 #12
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 recurrent_policy=False,
                 dataset=None,
                 resnet=False,
                 pretrained=False):
        super(myNet, self).__init__()
        self.dataset = dataset
        if len(obs_shape) == 3:  #our mnist case
            self.base = model.CNNBase(obs_shape[0],
                                      recurrent_policy,
                                      dataset=dataset)
        elif len(obs_shape) == 1:
            assert not recurrent_policy, \
             "Recurrent policy is not implemented for the MLP controller"
            self.base = MLPBase(obs_shape[0])
        else:
            raise NotImplementedError

        if action_space.__class__.__name__ == "Discrete":  # our case
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        if dataset in ['mnist', 'cifar10']:
            self.clf = Categorical(self.base.output_size, 2)  #10)

        self.state_size = self.base.state_size
コード例 #13
0
    def __init__(self, num_inputs, action_space):
        super(BPW_MLPPolicy, self).__init__()

        self.action_space = action_space

        self.fc1 = nn.Linear(num_inputs, 256)
        self.lrelu1 = nn.LeakyReLU(0.1)
        self.fc2 = nn.Linear(256, 256)
        self.lrelu2 = nn.LeakyReLU(0.1)
        self.fc3 = nn.Linear(256, 128)
        self.lrelu3 = nn.LeakyReLU(0.1)
        self.fc4 = nn.Linear(128, 128)
        self.lrelu4 = nn.LeakyReLU(0.1)

        self.value = nn.Linear(128, 1)
        self.policy = nn.Linear(128, 64)
        self.lrelu_policy = nn.LeakyReLU(0.1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(64, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
コード例 #14
0
ファイル: model.py プロジェクト: hantek/baby-ai-game
    def __init__(self, num_inputs, action_space, use_gru):
        super(CNNPolicy, self).__init__()

        self.conv1 = nn.Conv2d(num_inputs, 32, 2, stride=1)
        self.conv2 = nn.Conv2d(32, 32, 2, stride=1)
        self.conv3 = nn.Conv2d(32, 32, 2, stride=1)

        self.linear1 = nn.Linear(32 * 4 * 4, 512)

        if use_gru:
            self.gru = nn.GRUCell(512, 512)

        #self.lstm.register_forward_hook(printstat)

        self.critic_linear = nn.Linear(512, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
コード例 #15
0
    def __init__(self, num_inputs, hidden_size, num_outputs):
        super(Policy, self).__init__()

        self.base = nn.Sequential(
            init_relu(nn.Linear(num_inputs, hidden_size)), nn.ReLU(),
            init_tanh(nn.Linear(hidden_size, hidden_size)), nn.Tanh())

        self.dist = DiagGaussian(hidden_size, num_outputs)
コード例 #16
0
    def __init__(self,
                 num_inputs,
                 action_space,
                 num_heads=1,
                 reward_predictor=False,
                 use_s=True,
                 use_s_a=False,
                 use_s_a_sprime=False):
        assert use_s + use_s_a + use_s_a_sprime <= 1
        super(MLPPolicy, self).__init__()

        self.use_s = use_s
        self.use_s_a = use_s_a
        self.use_s_a_sprime = use_s_a_sprime

        self.num_heads = num_heads

        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64)
        self.a_fc2 = nn.Linear(64, 64)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(64, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError

        self.critics = []
        self.param_groups = [list(self.parameters())]

        cur_critic = self.build_critic(num_inputs,
                                       num_outputs=num_heads,
                                       hidden_size=64)
        self.critics.append(cur_critic)

        self.critics = nn.ModuleList(self.critics)

        for critic in list(self.critics):
            self.param_groups.append(list(critic.parameters()))

        if reward_predictor:
            if self.use_s:
                r_hat_input_size = num_inputs
            elif self.use_s_a:
                r_hat_input_size = num_inputs + num_outputs
            else:
                r_hat_input_size = num_inputs * 2 + num_outputs

            self.rp = self.build_critic(r_hat_input_size,
                                        num_outputs=1,
                                        hidden_size=64)
            self.param_groups.append(list(self.rp.parameters()))
        self.train()
        self.reset_parameters()
コード例 #17
0
ファイル: encoders.py プロジェクト: alvinzz/LRMBMRL
    def __init__(
        self,
        name,
        ob_dim,
        latent_dim,
        in_layer=None,
        out_activation=None,
        hidden_dims=[64, 64, 64],
        hidden_activation=tf.nn.tanh,
        weight_init=tf.contrib.layers.xavier_initializer,
        bias_init=tf.zeros_initializer,
        reuse_scope=False,
    ):
        with tf.variable_scope(name, reuse=reuse_scope):
            if in_layer is None:
                self.obs = tf.placeholder(tf.float32,
                                          shape=[None, ob_dim],
                                          name='obs')
            else:
                self.obs = in_layer

            self.mean_network = MLP('means',
                                    ob_dim,
                                    latent_dim,
                                    out_activation=out_activation,
                                    hidden_dims=hidden_dims,
                                    hidden_activation=hidden_activation,
                                    weight_init=weight_init,
                                    bias_init=bias_init,
                                    in_layer=self.obs)
            self.means = self.mean_network.layers['out']

            self.log_var_network = MLP('log_vars',
                                       ob_dim,
                                       latent_dim,
                                       out_activation=out_activation,
                                       hidden_dims=hidden_dims,
                                       hidden_activation=hidden_activation,
                                       weight_init=weight_init,
                                       bias_init=bias_init,
                                       in_layer=self.obs)
            self.log_vars = self.log_var_network.layers['out']

            self.distribution = DiagGaussian(self.means, self.log_vars)
            self.zs = self.distribution.sample()
コード例 #18
0
 def build_dist(self, action_space):
     if isinstance(action_space, Discrete):
         num_outputs = action_space.n
         return Categorical(self.recurrent_module.output_size, num_outputs)
     elif isinstance(action_space, Box):
         num_outputs = action_space.shape[0]
         return DiagGaussian(self.recurrent_module.output_size, num_outputs)
     else:
         raise NotImplementedError
コード例 #19
0
    def __init__(self, obs_shape, action_space, base_kwargs=None, activation=1, modulation=False):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}

        if len(obs_shape) == 3:
            self.base = CNNBase(obs_shape[0], activation = activation, modulation=modulation, **base_kwargs)
        elif len(obs_shape) == 1:
            self.base = MLPBase(obs_shape[0], **base_kwargs)
        else:
            raise NotImplementedError

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
コード例 #20
0
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            raise NotImplementedError
        self.num_inputs = num_inputs  #num of stacked frames
        self.num_outputs = num_outputs  #action size

        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear = nn.Linear(512, 1)
コード例 #21
0
    def __init__(self, obs_shape, action_space, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}

        if len(obs_shape) == 1:
            self.base = MLPBase(obs_shape[0], **base_kwargs)
        else:
            raise NotImplementedError

        num_outputs = action_space.shape[0]
        self.dist = DiagGaussian(self.base.output_size, num_outputs)
コード例 #22
0
    def __init__(self, num_inputs, action_space, use_gru, act_func):
        super(CNNPolicy, self).__init__()

        self.act_func = act_func
        self.acti = None
        ############## SETTING ACTIVATION FUNCTION STUFF ###################
        if act_func == 'relu':
            C = 1
            print(">> ||| USING RELU ACTIVATION FUNCTION ||| <<")
        elif act_func == 'maxout':
            C = 2
            self.acti = maxout
            print(">> ||| USING maxout ACTIVATION FUNCTION ||| <<")
        elif act_func == 'lwta':
            C = 1
            self.acti = lwta
            print(">> ||| USING LWTA ACTIVATION FUNCTION ||| <<")


        print(C)

        self.conv1 = nn.Conv2d(num_inputs, 32*C, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64*C, 4, stride=2)


        self.conv3 = nn.Conv2d(64, 32*C, 3, stride=1)


        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        #if use_att:
        #    self.att = att(256, 256)

        if use_gru:
            self.gru = nn.GRUCell(512, 256)


        self.critic_linear = nn.Linear(256, 1)

        if action_space.__class__.__name__ == "Discrete":
            # HARCODED CHAGING
            num_outputs = action_space.n
            self.dist = Categorical(256, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            #print("Sampling from Box")
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(256, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
コード例 #23
0
    def __init__(self, num_inputs, action_space, args):
        super(PommeCNNPolicySmall, self).__init__()
        self.args = args

        self.conv1 = nn.Conv2d(num_inputs,
                               args.num_channels,
                               3,
                               stride=1,
                               padding=1)
        self.conv2 = nn.Conv2d(args.num_channels,
                               args.num_channels,
                               3,
                               stride=1,
                               padding=1)
        self.conv3 = nn.Conv2d(args.num_channels,
                               args.num_channels,
                               3,
                               stride=1,
                               padding=1)
        self.conv4 = nn.Conv2d(args.num_channels,
                               args.num_channels,
                               3,
                               stride=1,
                               padding=1)

        self.bn1 = nn.BatchNorm2d(args.num_channels)
        self.bn2 = nn.BatchNorm2d(args.num_channels)
        self.bn3 = nn.BatchNorm2d(args.num_channels)
        self.bn4 = nn.BatchNorm2d(args.num_channels)

        # XXX: or should it go straight to 512?
        self.fc1 = nn.Linear(
            args.num_channels * (args.board_size) * (args.board_size), 1024)
        self.fc_bn1 = nn.BatchNorm1d(1024)

        self.fc2 = nn.Linear(1024, 512)
        self.fc_bn2 = nn.BatchNorm1d(512)

        self.critic_linear = nn.Linear(512, 1)
        self.actor_linear = nn.Linear(512, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
コード例 #24
0
    def __init__(self, obs_shape, action_shape, sensor_type,
                 atom_num_inputs_o_env, dim, num_agents, unordered,
                 independent, sigmoid, share, no_rnn):
        super(Policy, self).__init__()

        # Base network
        assert len(obs_shape) == 1, "We only handle flattened input."
        self.base = RNNBase(obs_shape[0], sensor_type, atom_num_inputs_o_env,
                            dim, num_agents, unordered, independent, share,
                            no_rnn)

        # Actor's final layer
        num_outputs = action_shape[0]
        if independent:
            self.dist = DiagGaussian(self.base.output_size, num_outputs, 1,
                                     sigmoid)
        else:
            self.dist = DiagGaussian(self.base.output_size, num_outputs,
                                     num_agents, sigmoid)

        self.state_size = self.base.state_size
        self.sigmoid = sigmoid
コード例 #25
0
ファイル: model.py プロジェクト: maximilianigl/rl-msol
    def __init__(self, action_space, architecture):
        super(Policy, self).__init__()

        self.encoder_output_size = architecture['encoder_output_size']
        if action_space.__class__.__name__ == "Discrete":
            self.num_outputs = action_space.n
            self.dist = Categorical(self.encoder_output_size, self.num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.encoder_output_size, num_outputs)
        else:
            raise NotImplementedError
        self.train()
コード例 #26
0
ファイル: model.py プロジェクト: MoMe36/MultiProcessGymEnv
    def __init__(self, obs_shape, action_space, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}

        self.base = MLPBase(obs_shape[0])

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.hidden_size, num_outputs)
        else:
            raise NotImplementedError
コード例 #27
0
ファイル: model.py プロジェクト: MoMe36/MultiProcessGymEnv
    def __init__(self, obs_size, ac_size, inner_size):

        super().__init__()

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(
            x, 0))

        self.actor_base = nn.Sequential(init_(nn.Linear(obs_size, inner_size)),
                                        nn.Tanh())
        self.actor_out = DiagGaussian(inner_size, ac_size)

        self.critic = nn.Sequential(init_(nn.Linear(obs_size, inner_size)),
                                    nn.Tanh(),
                                    init_(nn.Linear(inner_size, inner_size)),
                                    nn.Tanh(), init_(nn.Linear(inner_size, 1)))
コード例 #28
0
    def __init__(self, num_inputs, action_space, use_gru, use_vae, 
        use_batch_norm=False, use_residual=False, distribution = 'DiagGaussian'):

        super(CNNPolicy, self).__init__()
        print('num_inputs=%s' % str(num_inputs))

        self.conv1 = CNNBlock(num_inputs, 32, 7, 2, 3, use_batch_norm, False)
        self.conv2 = CNNBlock(32, 32, 3, 2, 1, use_batch_norm, use_residual)
        self.conv3 = CNNBlock(32, 32, 3, 2, 1, use_batch_norm, use_residual)
        self.conv4 = CNNBlock(32, 32, 3, 2, 1, use_batch_norm, use_residual)
        self.conv5 = CNNBlock(32, 32, 3, 1, 1, use_batch_norm, use_residual)

        self.linear1_drop = nn.Dropout(p=0.3)
        self.linear1 = nn.Linear(32 * 10 * 8, 256)

        self.gruhdim = 256
        if use_gru:
            self.gru = nn.GRUCell(self.gruhdim, self.gruhdim)

        self.use_vae = use_vae
        if use_vae:
            self.linearmean = nn.Linear(256, 256)
            self.linearvar = nn.Linear(256, 256)
            self.unlinearlatent = nn.Linear(256, 256)
            self.unlinear1 = nn.Linear(256, 32 * 10 * 8)

            self.unconv5 = CNNUnBlock(32, 32, 3, 1, 1, 0, use_batch_norm, use_residual)
            self.unconv4 = CNNUnBlock(32, 32, 3, 2, (1,1), (1,0), use_batch_norm, use_residual)
            self.unconv3 = CNNUnBlock(32, 32, 3, 2, 1, 1, use_batch_norm, use_residual)
            self.unconv2 = CNNUnBlock(32, 32, 3, 2, 1, 1, use_batch_norm, use_residual)
            self.unconv1 = CNNUnBlock(num_inputs, 32, 7, 2, 3, 1, False, False)

        self.critic_linear = nn.Linear(256, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(256, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            if distribution == 'DiagGaussian':
                self.dist = DiagGaussian(256, num_outputs)
            elif distribution == 'MixedDistribution':
                self.dist = MixedDistribution(256, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
コード例 #29
0
    def __init__(self, obs_shape, action_space, base_kwargs=None):
        super(StateGen, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}

        if len(obs_shape) == 1:
            self.base = StateMLPBase(obs_shape[0] + action_space.shape[0],
                                     **base_kwargs)
        else:
            raise NotImplementedError

        if action_space.__class__.__name__ == "Box":
            num_outputs = obs_shape[0]
            self.dist = DiagGaussian(self.base.hidden_size, num_outputs)
        else:
            raise NotImplementedError
コード例 #30
0
    def __init__(self, num_inputs, action_space, act_func, drop, num_updates):
        super(MLPPolicy, self).__init__()
        self.drop = drop
        self.act_func = act_func
        self.num_updates = num_updates
        self.counter = num_updates
        ############## SETTING ACTIVATION FUNCTION STUFF ###################
        if act_func == 'tanh':
            C = 1
            print(">> ||| USING tanh ACTIVATION FUNCTION ||| <<")
        elif act_func == 'maxout':
            C = 2
            self.acti = maxout
            print(">> ||| USING maxout ACTIVATION FUNCTION ||| <<")
        elif act_func == 'lwta':
            self.acti = lwta
            C = 1
            print(">> ||| USING LWTA ACTIVATION FUNCTION ||| <<")
        elif act_func == 'relu':
            self.acti  = F.relu
            C = 1
            print(">> ||| USING RELU ACTIVATION FUNCTION ||| <<")

        print(C)


        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64*C)
        self.a_fc2 = nn.Linear(64, 64*C)

        self.v_fc1 = nn.Linear(num_inputs, 64*C)
        self.v_fc2 = nn.Linear(64, 64*C)
        self.v_fc3 = nn.Linear(64, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(64, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()