Exemple #1
0
    def __init__(self,img_params,model_params,latent_params):
        super(CatVAE, self).__init__()
        image_dim  = img_params['image_dim']
        image_size = img_params['image_size']

        n_downsample = model_params['n_downsample']
        dim      = model_params['dim']
        n_res    = model_params['n_res']
        norm     = model_params['norm']
        activ    = model_params['activ']
        pad_type = model_params['pad_type']
        n_mlp    = model_params['n_mlp']
        mlp_dim  = model_params['mlp_dim']

        self.continious_dim = latent_params['continious']
        self.prior_cont = Gaussian(self.continious_dim)

        self.categorical_dim = latent_params['categorical']
        self.prior_catg = Categorical(self.categorical_dim)
        self.gumbel     = Gumbel(self.categorical_dim)

        self.encoder = CatEncoder(n_downsample,n_res,n_mlp,image_size,image_dim,dim,mlp_dim,
                                  latent_params,norm,activ,pad_type)

        conv_inp_size = image_size // (2**n_downsample)
        decoder_inp_dim = self.continious_dim + self.categorical_dim
        self.decoder = Decoder(n_downsample,n_res,n_mlp,decoder_inp_dim,mlp_dim,conv_inp_size,
                               dim,image_dim,norm,activ,pad_type)
Exemple #2
0
    def test_categorical():
        cat = Categorical(3)
        new_prob = np.array([random_softmax(3), random_softmax(3)], )
        old_prob = np.array([random_softmax(3), random_softmax(3)], )

        x = np.array([
            [0, 1, 0],
            [0, 0, 1],
        ], dtype=np.float32)

        new_prob_sym = tf.constant(new_prob)
        old_prob_sym = tf.constant(old_prob)

        x_sym = tf.constant(x)

        new_info_sym = dict(prob=new_prob_sym)
        old_info_sym = dict(prob=old_prob_sym)

        np.testing.assert_allclose(
            cat.kl(new_info_sym, new_info_sym).eval(session=sess),
            np.array([0., 0.]))
        np.testing.assert_allclose(
            cat.kl(old_info_sym, new_info_sym).eval(session=sess),
            np.sum(old_prob *
                   (np.log(old_prob + 1e-8) - np.log(new_prob + 1e-8)),
                   axis=-1))
        np.testing.assert_allclose(
            cat.logli(x_sym, old_info_sym).eval(session=sess),
            [np.log(old_prob[0][1] + 1e-8),
             np.log(old_prob[1][2] + 1e-8)],
            rtol=1e-5)
Exemple #3
0
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.act_func = F.leaky_relu # F.tanh ##  F.elu F.relu F.softplus

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear = nn.Linear(512, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            # raise NotImplementedError
            self.dist = Categorical(512, action_space)



        self.train()
        self.reset_parameters()
    def __init__(self,
                 obs_shape,
                 action_space,
                 recurrent_policy=False,
                 dataset=None,
                 resnet=False,
                 pretrained=False):
        super(myNet, self).__init__()
        self.dataset = dataset
        if len(obs_shape) == 3:  #our mnist case
            self.base = model.CNNBase(obs_shape[0],
                                      recurrent_policy,
                                      dataset=dataset)
        elif len(obs_shape) == 1:
            assert not recurrent_policy, \
             "Recurrent policy is not implemented for the MLP controller"
            self.base = MLPBase(obs_shape[0])
        else:
            raise NotImplementedError

        if action_space.__class__.__name__ == "Discrete":  # our case
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        if dataset in ['mnist', 'cifar10']:
            self.clf = Categorical(self.base.output_size, 2)  #10)

        self.state_size = self.base.state_size
Exemple #5
0
    def __init__(self,
                 obs_shape,
                 action_space,
                 recurrent_policy,
                 hidden_size,
                 args):

        super(Policy, self).__init__()
        if len(obs_shape) == 3:
            self.base = CNNBase(obs_shape[0], recurrent_policy)
        elif len(obs_shape) == 1:
            assert not recurrent_policy, \
                "Recurrent policy is not implemented for the MLP controller"
            self.base = MLPBase(obs_shape[0], hidden_size, args)
        else:
            raise NotImplementedError

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        self.state_size = self.base.state_size
        self.leaky = args.leaky
        self.scale = 1.
Exemple #6
0
    def __init__(self, num_inputs, action_space, num_heads=1, hidden_size=512):
        super(CNNPolicy, self).__init__()
        self.num_heads = num_heads

        self.representations = []
        self.critics = []
        for _ in range(num_heads):
            self.representations.append(
                self.build_representation(num_inputs, hidden_size=hidden_size))
            self.critics.append(self.build_critic(hidden_size, 1))

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(hidden_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(hidden_size, num_outputs)
        else:
            raise NotImplementedError

        self.critics = nn.ModuleList(self.critics)
        self.representations = nn.ModuleList(self.representations)

        self.param_groups = [list(self.parameters())]

        self.train()
        self.reset_parameters()
Exemple #7
0
def main():
    from meta import MouselabEnv
    from distributions import Categorical

    env = MouselabEnv(2, 2, reward=Categorical([0, 1]))
    Q, V, pi, info = solve(env)
    V(env._state)
Exemple #8
0
    def actor(self, states, name='actor', reuse=False, trainable=True):
        with tf.variable_scope(name, reuse=reuse):
            features = self.actor_net(states,
                                      self.drop_rate,
                                      trainable=trainable)

            if isinstance(self.act_space, gym.spaces.Discrete):
                logits = Dense(self.act_space.n,
                               None,
                               trainable=trainable,
                               name="layer_logits")(features)
                distribution = Categorical(logits)

            else:
                mean = Dense(self.act_space.shape[0],
                             None,
                             trainable=trainable,
                             name='mean')(features)
                logstd = tf.get_variable(
                    'logstd',
                    initializer=-0.5 *
                    np.ones(self.act_space.shape[0], dtype=np.float32))

                # logstd = Dense(self.act_space.shape[0], None, trainable=trainable, name='logstd')(features)
                distribution = Normal(mean=mean, logstd=logstd)

            return distribution
Exemple #9
0
    def __init__(self, obs_shape, action_space, one_hot, hid_size, recurrent_policy, label):
        super(EHRL_Policy, self).__init__()

        self.hid_size = hid_size
        self.label = label
        # self.num_hid_layers = num_hid_layers
        # self.num_subpolicies = num_subpolicies
        # self.gaussian_fixed_var = gaussian_fixed_var

        if len(obs_shape) == 3:
            self.base = CNNBase(obs_shape[0], one_hot, self.hid_size, recurrent_policy)
        elif len(obs_shape) == 1:
            assert not recurrent_policy, \
                "Recurrent policy is not implemented for the MLP controller"
            self.base = MLPBase(obs_shape[0], one_hot, self.hid_size)
        else:
            raise NotImplementedError

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        self.state_size = self.base.state_size
Exemple #10
0
    def __init__(self, num_inputs, action_space, use_rp=False, num_heads=1):
        super(CNNPolicy, self).__init__()
        self.use_rp = use_rp
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear = nn.Linear(512, num_heads)

        num_outputs = action_space.n
        self.dist = Categorical(512, num_outputs)

        if use_rp:
            self.extra_conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
            self.extra_conv2 = nn.Conv2d(32, 64, 4, stride=2)
            self.extra_conv3 = nn.Conv2d(64, 32, 3, stride=1)
            self.extra_hidden = nn.Linear(32 * 7 * 7, 512)
            self.extra_critics = nn.Linear(512, 1)
            len_params = len(
                list(self.extra_conv1.parameters()) +
                list(self.extra_conv2.parameters()) +
                list(self.extra_conv3.parameters()) +
                list(self.extra_hidden.parameters()) +
                list(self.extra_critics.parameters()))
            self.param_groups = [
                list(self.parameters())[-len_params:],
                list(self.parameters())[:-len_params]
            ]
        else:
            self.param_groups = [list(self.parameters())]

        self.train()
        self.reset_parameters()
Exemple #11
0
    def __init__(self, num_inputs, num_actions, use_gru, input_shape):
        super(CNNPolicy, self).__init__()
        # self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        # self.relu1 = nn.ReLU(True)
        # self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        # self.relu2 = nn.ReLU(True)
        # self.conv3 = nn.Conv2d(64, 32, 3, stride=1)
        # self.relu3 = nn.ReLU()
        self.h = None
        self.conv_head = nn.Sequential(nn.Conv2d(num_inputs, 32, 8, stride=4),
                                       nn.ReLU(True),
                                       nn.Conv2d(32, 64, 4, stride=2),
                                       nn.ReLU(True),
                                       nn.Conv2d(64, 32, 3, stride=1),
                                       nn.ReLU())

        conv_input = torch.autograd.Variable(torch.randn((1, ) + input_shape))
        self.conv_out_size = self.conv_head(conv_input).nelement()
        self.hidden_size = 512
        self.linear1 = nn.Linear(self.conv_out_size, self.hidden_size)

        if use_gru:
            self.gru = nn.GRUCell(512, 512)

        self.critic_linear = nn.Linear(512, 1)

        self.dist = Categorical(512, num_actions)

        self.eval()
        self.reset_parameters()
Exemple #12
0
    def __init__(self, num_inputs, input_shape, params):
        super(CNNPolicy, self).__init__()

        self.conv_head = nn.Sequential(
            nn.Conv2d(num_inputs, params.conv1_size, 8, stride=4),
            nn.ReLU(True),
            nn.Conv2d(params.conv1_size, params.conv2_size, 4, stride=2),
            nn.ReLU(True),
            nn.Conv2d(params.conv2_size, params.conv3_size, 3, stride=1),
            nn.ReLU(True))

        conv_input = torch.Tensor(torch.randn((1, ) + input_shape))
        print(conv_input.size(),
              self.conv_head(conv_input).size(),
              self.conv_head(conv_input).size())
        self.conv_out_size = self.conv_head(conv_input).nelement()
        self.hidden_size = params.hidden_size

        self.linear1 = nn.Linear(self.conv_out_size, self.hidden_size)

        if params.recurrent_policy:
            #self.gru = MaskedGRU(self.hidden_size, self.hidden_size) TODO: check speedup with masked GRU optimization
            self.gru = nn.GRUCell(self.hidden_size, self.hidden_size)

        self.critic_linear = nn.Linear(self.hidden_size, 1)
        self.dist = Categorical(self.hidden_size, params.num_actions)

        self.params = params
        self.train()
        self.reset_parameters()
Exemple #13
0
    def __init__(self, num_inputs, action_space, n_contexts):
        super(CNNPolicy, self).__init__()

        # if action_space.__class__.__name__ == "Discrete":
        #     num_outputs = action_space.n
        #     self.dist = Categorical(512, num_outputs)
        # elif action_space.__class__.__name__ == "Box":
        #     num_outputs = action_space.shape[0]
        #     self.dist = DiagGaussian(512, num_outputs)
        # else:
        #     raise NotImplementedError

        num_outputs = action_space.n
        # print (num_outputs)
        # fda
        # self.dist = Categorical(num_outputs)
        self.dist = Categorical()

        self.num_inputs = num_inputs  #num of stacked frames
        self.num_outputs = num_outputs  #action size

        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        l_size = 10  #512
        self.linear1 = nn.Linear(32 * 7 * 7, l_size)

        n_contexts = 2

        self.action_linear = nn.Linear(l_size + n_contexts, 4)
        self.action_linear2 = nn.Linear(4, num_outputs)

        self.critic_linear = nn.Linear(l_size + n_contexts, 1)
Exemple #14
0
    def __init__(self, num_inputs, action_space):
        super(BPW_MLPPolicy, self).__init__()

        self.action_space = action_space

        self.fc1 = nn.Linear(num_inputs, 256)
        self.lrelu1 = nn.LeakyReLU(0.1)
        self.fc2 = nn.Linear(256, 256)
        self.lrelu2 = nn.LeakyReLU(0.1)
        self.fc3 = nn.Linear(256, 128)
        self.lrelu3 = nn.LeakyReLU(0.1)
        self.fc4 = nn.Linear(128, 128)
        self.lrelu4 = nn.LeakyReLU(0.1)

        self.value = nn.Linear(128, 1)
        self.policy = nn.Linear(128, 64)
        self.lrelu_policy = nn.LeakyReLU(0.1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(64, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
Exemple #15
0
    def __init__(self,
                 components,
                 alpha_0=None,
                 a_0=None,
                 b_0=None,
                 weights=None,
                 weights_obj=None):
        assert len(components) > 0
        assert (alpha_0 is not None) ^ (a_0 is not None and b_0 is not None) \
                ^ (weights_obj is not None)

        self.components = components

        if alpha_0 is not None:
            self.weights = Categorical(alpha_0=alpha_0,
                                       K=len(components),
                                       weights=weights)
        elif weights_obj is not None:
            self.weights = weights_obj
        else:
            self.weights = CategoricalAndConcentration(a_0=a_0,
                                                       b_0=b_0,
                                                       K=len(components),
                                                       weights=weights)

        self.labels_list = []
    def __init__(self, num_inputs, action_space):
        super(MLPPolicy, self).__init__()

        self.action_space = action_space

        self.input_norm = WelfordNormalization(num_inputs)

        self.a_fc1 = nn.Linear(num_inputs, 64)
        self.a_fc2 = nn.Linear(64, 64)

        self.v_fc1 = nn.Linear(num_inputs, 64)
        self.v_fc2 = nn.Linear(64, 64)
        self.v_fc3 = nn.Linear(64, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(64, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
Exemple #17
0
    def __init__(self, num_inputs, action_space, use_gru):
        super(CNNPolicy, self).__init__()

        #print('num_inputs=%s' % str(num_inputs))

        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=2)
        self.conv2 = nn.Conv2d(32, 32, 4, stride=2)
        self.conv3 = nn.Conv2d(32, 32, 4, stride=2)
        self.conv4 = nn.Conv2d(32, 32, 4, stride=1)

        self.linear1 = nn.Linear(32 * 2 * 2, 256)

        if use_gru:
            self.gru = nn.GRUCell(512, 512)

        self.critic_linear = nn.Linear(256, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(256, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(256, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
Exemple #18
0
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy2, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear1 = nn.Linear(512, 200)
        self.critic_linear2 = nn.Linear(200, 1)

        self.actor_linear1 = nn.Linear(512, 200)
        # self.actor_linear2 = nn.Linear(200, 200)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(200, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(200, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
Exemple #19
0
    def __init__(self, num_inputs, action_space, use_gru):
        super(CNNPolicy, self).__init__()

        self.conv1 = nn.Conv2d(num_inputs, 32, 2, stride=1)
        self.conv2 = nn.Conv2d(32, 32, 2, stride=1)
        self.conv3 = nn.Conv2d(32, 32, 2, stride=1)

        self.linear1 = nn.Linear(32 * 4 * 4, 512)

        if use_gru:
            self.gru = nn.GRUCell(512, 512)

        #self.lstm.register_forward_hook(printstat)

        self.critic_linear = nn.Linear(512, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()
Exemple #20
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        if base is None:
            if len(obs_shape) == 3:
                base = CNNBase
            elif len(obs_shape) == 1:
                base = MLPBase
            else:
                raise NotImplementedError

        self.base = base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            num_action_outputs = 512
            self.dist = Categorical(num_action_outputs,num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Exemple #21
0
    def __init__(self, num_inputs, num_actions, use_gru, input_shape):
        super(CNNDepthPolicy, self).__init__()

        self.conv_head = nn.Sequential(nn.Conv2d(num_inputs, 32, 8, stride=4),
                                       nn.ReLU(True),
                                       nn.Conv2d(32, 64, 4, stride=2),
                                       nn.ReLU(True),
                                       nn.Conv2d(64, 32, 3, stride=1),
                                       nn.ReLU())

        self.depth_head = nn.Conv2d(32, 8, 1, 1)

        conv_input = torch.autograd.Variable(torch.randn((1, ) + input_shape))
        print(conv_input.size(), self.conv_head(conv_input).size())
        self.conv_out_size = self.conv_head(conv_input).nelement()
        self.linear1 = nn.Linear(self.conv_out_size, 512)

        if use_gru:
            self.gru = nn.GRUCell(512, 512)

        self.critic_linear = nn.Linear(512, 1)
        self.dist = Categorical(512, num_actions)

        self.train()
        self.reset_parameters()
Exemple #22
0
 def test_kl_sym():
     old_id_0_prob = np.array([random_softmax(5)])
     old_id_1_prob = np.array([random_softmax(3)])
     new_id_0_prob = np.array([random_softmax(5)])
     new_id_1_prob = np.array([random_softmax(3)])
     old_dist_info_vars = dict(id_0_prob=tf.constant(old_id_0_prob),
                               id_1_prob=tf.constant(old_id_1_prob))
     new_dist_info_vars = dict(id_0_prob=tf.constant(new_id_0_prob),
                               id_1_prob=tf.constant(new_id_1_prob))
     np.testing.assert_allclose(
         dist1.kl(old_dist_info_vars,
                  new_dist_info_vars).eval(session=sess),
         Categorical(5).kl(dict(prob=old_id_0_prob),
                           dict(prob=new_id_0_prob)).eval(session=sess) +
         Categorical(3).kl(dict(prob=old_id_1_prob),
                           dict(prob=new_id_1_prob)).eval(session=sess))
Exemple #23
0
    def __init__(self,
                 num_inputs,
                 action_space,
                 num_heads=1,
                 reward_predictor=False,
                 use_s=True,
                 use_s_a=False,
                 use_s_a_sprime=False):
        assert use_s + use_s_a + use_s_a_sprime <= 1
        super(MLPPolicy, self).__init__()

        self.use_s = use_s
        self.use_s_a = use_s_a
        self.use_s_a_sprime = use_s_a_sprime

        self.num_heads = num_heads

        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64)
        self.a_fc2 = nn.Linear(64, 64)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(64, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError

        self.critics = []
        self.param_groups = [list(self.parameters())]

        cur_critic = self.build_critic(num_inputs,
                                       num_outputs=num_heads,
                                       hidden_size=64)
        self.critics.append(cur_critic)

        self.critics = nn.ModuleList(self.critics)

        for critic in list(self.critics):
            self.param_groups.append(list(critic.parameters()))

        if reward_predictor:
            if self.use_s:
                r_hat_input_size = num_inputs
            elif self.use_s_a:
                r_hat_input_size = num_inputs + num_outputs
            else:
                r_hat_input_size = num_inputs * 2 + num_outputs

            self.rp = self.build_critic(r_hat_input_size,
                                        num_outputs=1,
                                        hidden_size=64)
            self.param_groups.append(list(self.rp.parameters()))
        self.train()
        self.reset_parameters()
 def __init__(self, num_inputs, num_outputs, action_space, use_gru):
     super(OptionPolicy, self).__init__()
     if num_outputs == None:
         if action_space.__class__.__name__ == "Discrete":
             num_outputs = action_space.n
             self.dist = Categorical(512, num_outputs)
         elif action_space.__class__.__name__ == "Box":
             num_outputs = action_space.shape[0]
             self.dist = DiagGaussian(512, num_outputs)
         else:
             raise NotImplementedError
     else:
         self.dist = Categorical(512, num_outputs)
     self.conv1 = nn.Conv2d(num_inputs, 16, 3, stride=1, padding=1)
     self.linear1 = nn.Linear(400, 512)
     self.linear_critic = nn.Linear(512, 1)
     self.train()
     self.reset_parameters()
Exemple #25
0
 def build_dist(self, action_space):
     if isinstance(action_space, Discrete):
         num_outputs = action_space.n
         return Categorical(self.recurrent_module.output_size, num_outputs)
     elif isinstance(action_space, Box):
         num_outputs = action_space.shape[0]
         return DiagGaussian(self.recurrent_module.output_size, num_outputs)
     else:
         raise NotImplementedError
Exemple #26
0
    def log_likelihood(self,x, K_extra=1):
        """
        Estimate the log likelihood with samples from
         the model. Draw k_extra components which were not populated by
         the current model in order to create a truncated approximate
         mixture model.
        """
        x = np.asarray(x)
        ks = self._get_occupied()
        K = len(ks)
        K_total = K + K_extra

        # Sample observation distributions given current labels
        obs_distns = []
        for k in range(K):
            o = copy.deepcopy(self.obs_distn)
            o.resample(data=self._get_data_withlabel(k))
            obs_distns.append(o)

        # Sample extra observation distributions from prior
        for k in range(K_extra):
            o = copy.deepcopy(self.obs_distn)
            o.resample()
            obs_distns.append(o)

        # Sample a set of weights
        weights = Categorical(alpha_0=self.alpha_0,
                              K=K_total,
                              weights=None)

        assert len(self.labels_list) == 1
        weights.resample(data=self.labels_list[0].z)

        # Now compute the log likelihood
        vals = np.empty((x.shape[0],K_total))
        for k in range(K_total):
            vals[:,k] = obs_distns[k].log_likelihood(x)

        vals += weights.log_likelihood(np.arange(K_total))
        assert not np.isnan(vals).any()
        return np.logaddexp.reduce(vals,axis=1).sum()
    def __init__(self, num_action, input_shape=(120, 160, 3), batch_size=64, training=True, model_path=None, k=4,
                 clip=0.2, use_clipped=True, entropy_coef=0.01, max_grad_norm=0.5, value_loss_coef=0.5, *args, **kwargs):
        super(PPOAgent, self).__init__(*args, **kwargs)
        self.steer = [-0.3, -0.15, 0, 0.15, 0.3]
        self.throttle = [0, 0.2, 0.4, 0.6, 0.8]
        self.perception = ImpalaPerception()
        self.actor_critic = ImpalaActorCritic(1216, 128)
        self.actor_critic_target = ImpalaActorCritic(1216, 128)
        # self.perception = Perception()
        # self.actor_critic = ActorCritic(num_processed=1216,num_hidden=128)
        # self.actor_critic_target = ActorCritic(num_processed=1216, num_hidden=128)
        # load model
        self.actor_critic_target.load_state_dict(self.actor_critic.state_dict())
        self.tau = 1e-3

        # set optimizer
        self.lr = 0.001
        #self.decay = -5000
        self.actor_critic_optim = optim.Adam(self.actor_critic.parameters(), lr=self.lr,)
        self.perc_optim = optim.Adam(self.perception.parameters(), lr=self.lr,)

        # common settings
        self.gamma = 0.99
        self.memory = Memory(batch_size=batch_size, img_shape=input_shape)
        self.model_path = model_path
        self.n = 1
        self.train_step = 0
        self.r_sum = 0
        self.last_state = None
        self.last_actions = None
        self.batch_size = batch_size

        # about PPO
        self.k = k
        self.clip = clip
        self.entropy_coef = entropy_coef
        self.use_clipped = use_clipped
        self.max_grad_norm = max_grad_norm
        self.value_loss_coef = value_loss_coef
        self.dist1 = Categorical(self.actor_critic.num_hidden, len(self.steer))
        self.dist2 = Categorical(self.actor_critic.num_hidden, len(self.throttle))
Exemple #28
0
    def log_likelihood(self,x, K_extra=1):
        """
        Estimate the log likelihood with samples from
         the model. Draw k_extra components which were not populated by
         the current model in order to create a truncated approximate
         mixture model.
        """
        x = np.asarray(x)
        ks = self._get_occupied()
        K = len(ks)
        K_total = K + K_extra

        # Sample observation distributions given current labels
        obs_distns = []
        for k in range(K):
            o = copy.deepcopy(self.obs_distn)
            o.resample(data=self._get_data_withlabel(k))
            obs_distns.append(o)

        # Sample extra observation distributions from prior
        for k in range(K_extra):
            o = copy.deepcopy(self.obs_distn)
            o.resample()
            obs_distns.append(o)

        # Sample a set of weights
        weights = Categorical(alpha_0=self.alpha_0,
                              K=K_total,
                              weights=None)

        assert len(self.labels_list) == 1
        weights.resample(data=self.labels_list[0].z)

        # Now compute the log likelihood
        vals = np.empty((x.shape[0],K_total))
        for k in range(K_total):
            vals[:,k] = obs_distns[k].log_likelihood(x)

        vals += weights.log_likelihood(np.arange(K_total))
        assert not np.isnan(vals).any()
        return logsumexp(vals,axis=1).sum()
Exemple #29
0
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()

        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)
        self.linear1 = nn.Linear(32 * 7 * 7, 512)
        self.critic_linear = nn.Linear(512, 1)
        self.dist = Categorical(512, action_space.n)

        self.train()
        self.reset_parameters()
Exemple #30
0
    def __init__(self, obs_space, action_space, base=None, base_kwargs=None):
        super(Policy, self).__init__()

        obs_shape = obs_space.shape

        if base_kwargs is None:
            base_kwargs = {}

        self.base = MLPBase(obs_shape[0], **base_kwargs)

        num_outputs = action_space.n
        self.dist = Categorical(self.base.output_size, num_outputs)
Exemple #31
0
 def __init__(self, min_tactus, max_tactus):
     intervals = list(range(min_tactus, max_tactus))
     conditionals = intervals
     distributions = []
     for first in intervals:
         params = []
         for second in intervals:
             params.append(np.exp(-(.5 * abs(first - second)) ** 2))
         normalised = [p/sum(params) for p in params]
         distribution = Categorical(intervals, normalised[:-1])
         distributions.append(distribution)
     super().__init__(conditionals, distributions)
    def __init__(self, num_inputs, action_size):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)


        # self.conv1_bn = nn.BatchNorm2d(32)
        # self.conv2_bn = nn.BatchNorm2d(64)
        # self.conv3_bn = nn.BatchNorm2d(32)

        self.act_func = F.leaky_relu # F.tanh ##  F.elu F.relu F.softplus

        # print (num_inputs)
        # fasd

        if num_inputs == 6:
            self.intermediate_size = 11264
        else:
            self.intermediate_size = 32*7*7



        # self.linear1 = nn.Linear(32 * 7 * 7, 512)
        self.linear1 = nn.Linear(self.intermediate_size, 512)

        self.critic_linear = nn.Linear(512, 1)


        num_outputs = action_size # action_space.n
        self.dist = Categorical(512, num_outputs)

        # if action_space.__class__.__name__ == "Discrete":
        #     num_outputs = action_space.n
        #     self.dist = Categorical(512, num_outputs)
        # elif action_space.__class__.__name__ == "Box":
        #     num_outputs = action_space.shape[0]
        #     self.dist = DiagGaussian(512, num_outputs)
        # else:
        #     raise NotImplementedError

        self.train()
        self.reset_parameters()
class CNNPolicy(nn.Module):
    def __init__(self, num_inputs, action_size):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)


        # self.conv1_bn = nn.BatchNorm2d(32)
        # self.conv2_bn = nn.BatchNorm2d(64)
        # self.conv3_bn = nn.BatchNorm2d(32)

        self.act_func = F.leaky_relu # F.tanh ##  F.elu F.relu F.softplus

        # print (num_inputs)
        # fasd

        if num_inputs == 6:
            self.intermediate_size = 11264
        else:
            self.intermediate_size = 32*7*7



        # self.linear1 = nn.Linear(32 * 7 * 7, 512)
        self.linear1 = nn.Linear(self.intermediate_size, 512)

        self.critic_linear = nn.Linear(512, 1)


        num_outputs = action_size # action_space.n
        self.dist = Categorical(512, num_outputs)

        # if action_space.__class__.__name__ == "Discrete":
        #     num_outputs = action_space.n
        #     self.dist = Categorical(512, num_outputs)
        # elif action_space.__class__.__name__ == "Box":
        #     num_outputs = action_space.shape[0]
        #     self.dist = DiagGaussian(512, num_outputs)
        # else:
        #     raise NotImplementedError

        self.train()
        self.reset_parameters()

    def reset_parameters(self):
        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.linear1.weight.data.mul_(relu_gain)

        if self.dist.__class__.__name__ == "DiagGaussian":
            self.dist.fc_mean.weight.data.mul_(0.01)


    def encode(self, inputs):

        x = self.conv1(inputs)# / 255.0)
        # x = self.conv1_bn(self.conv1(inputs / 255.0))
        # x = F.relu(x)
        # x = F.elu(x)
        # x = F.softplus(x)
        # x = F.tanh(x)
        x = self.act_func(x)

        x = self.conv2(x)
        # x = self.conv2_bn(self.conv2(x))
        # x = F.relu(x)
        # x = F.elu(x)
        # x = F.softplus(x)
        x = self.act_func(x)

        x = self.conv3(x)
        # x = self.conv3_bn(self.conv3(x))
        # x = F.relu(x)
        # x = F.elu(x)
        # x = F.softplus(x)
        x = self.act_func(x)


        x = x.view(-1, self.intermediate_size)

        x = self.linear1(x)

        return x


    def predict_for_action(self, inputs):

        # for_action = F.relu(inputs)
        # for_action = F.elu(inputs)
        # for_action = F.softplus(inputs)
        for_action = self.act_func(inputs)


        return for_action

    def predict_for_value(self, inputs):

        # x = F.relu(inputs)
        # x = F.elu(inputs)
        # x = F.softplus(inputs)
        x = self.act_func(inputs)
        
        for_value= self.critic_linear(x)

        return for_value

    def forward(self, inputs):

        x = self.encode(inputs)
        for_action = self.predict_for_action(x)
        for_value = self.predict_for_value(x)

        return for_value, for_action


    def action_dist(self, inputs):
        x = self.encode(inputs)
        for_action = self.predict_for_action(x)

        return self.dist.action_probs(for_action)




    def action_logdist(self, inputs):
        x = self.encode(inputs)
        for_action = self.predict_for_action(x)
        dist = self.dist.action_logprobs(for_action)
        return dist




    def act(self, inputs, deterministic=False):

        # print ('sss')
        value, x_action = self(inputs)
        # action = self.dist.sample(x_action, deterministic=deterministic)
        # action_log_probs, dist_entropy = self.dist.evaluate_actions(x_action, actions)

        # x_action.mean().backward()
        # fsadf

        action, action_log_probs, dist_entropy = self.dist.sample2(x_action, deterministic=deterministic)

        # action_log_probs.mean().backward()
        # fsadf

        # print (value)
        # print (action)
        # fdsfa

        return value, action, action_log_probs, dist_entropy
class CNNPolicy(nn.Module):
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.act_func = F.leaky_relu # F.tanh ##  F.elu F.relu F.softplus

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear = nn.Linear(512, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            # raise NotImplementedError
            self.dist = Categorical(512, action_space)



        self.train()
        self.reset_parameters()

    def reset_parameters(self):
        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.linear1.weight.data.mul_(relu_gain)

        if self.dist.__class__.__name__ == "DiagGaussian":
            self.dist.fc_mean.weight.data.mul_(0.01)


    def encode(self, inputs):

        # x = self.conv1(inputs / 255.0)
        x = self.conv1(inputs )
        x = self.act_func(x)

        x = self.conv2(x)
        x = self.act_func(x)

        x = self.conv3(x)
        x = self.act_func(x)

        x = x.view(-1, 32 * 7 * 7)

        x = self.linear1(x)

        return x


    def predict_for_action(self, inputs):

        for_action = self.act_func(inputs)

        return for_action

    def predict_for_value(self, inputs):

        x = self.act_func(inputs)
        for_value= self.critic_linear(x)

        return for_value

    def forward(self, inputs):

        x = self.encode(inputs)
        for_action = self.predict_for_action(x)
        for_value = self.predict_for_value(x)

        return for_value, for_action


    def action_dist(self, inputs):
        x = self.encode(inputs)
        for_action = self.predict_for_action(x)

        dist = self.dist.action_probs(for_action)

        # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.linear1.weight)[0]))  #nonzero
        # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv3.weight)[0]))  #nonzero
        # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv2.weight)[0]))     # ZERO
        # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv1.weight)[0]))      # ZERO 
        # fdsa

        return dist


    def action_logdist(self, inputs):
        x = self.encode(inputs)
        for_action = self.predict_for_action(x)

        dist = self.dist.action_logprobs(for_action)

        # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.linear1.weight)[0]))  #nonzero
        # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv3.weight)[0]))  #nonzero
        # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv2.weight)[0]))     # ZERO
        # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv1.weight)[0]))      # ZERO 
        # fdsa

        return dist




    def act(self, inputs, deterministic=False):
        value, x_action = self(inputs)
        # action = self.dist.sample(x_action, deterministic=deterministic)
        # action_log_probs, dist_entropy = self.dist.evaluate_actions(x_action, actions)

        # x_action.mean().backward()
        # fsadf

        action, action_log_probs, dist_entropy = self.dist.sample2(x_action, deterministic=deterministic)

        # action_log_probs.mean().backward()
        # fsadf

        return value, action, action_log_probs, dist_entropy