Ejemplo n.º 1
0
    def __init__(self, num_inputs, action_space):
        super(MLPPolicy, self).__init__()

        self.obs_filter = ObsNorm((1, num_inputs), clip=5)
        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64, bias=False)
        self.a_ab1 = AddBias(64)
        self.a_fc2 = nn.Linear(64, 64, bias=False)
        self.a_ab2 = AddBias(64)
        self.a_fc_mean = nn.Linear(64, action_space.shape[0], bias=False)
        self.a_ab_mean = AddBias(action_space.shape[0])
        self.a_ab_logstd = AddBias(action_space.shape[0])

        self.v_fc1 = nn.Linear(num_inputs, 64, bias=False)
        self.v_ab1 = AddBias(64)
        self.v_fc2 = nn.Linear(64, 64, bias=False)
        self.v_ab2 = AddBias(64)
        self.v_fc3 = nn.Linear(64, 1, bias=False)
        self.v_ab3 = AddBias(1)

        self.apply(weights_init_mlp)

        tanh_gain = nn.init.calculate_gain('tanh')
        #self.a_fc1.weight.data.mul_(tanh_gain)
        #self.a_fc2.weight.data.mul_(tanh_gain)
        self.a_fc_mean.weight.data.mul_(0.01)
        #self.v_fc1.weight.data.mul_(tanh_gain)
        #self.v_fc2.weight.data.mul_(tanh_gain)

        self.train()
Ejemplo n.º 2
0
    def __init__(self, num_inputs, action_space, do_encode_mean=True):
        super(MLPPolicySeparate, self).__init__()

        self.obs_filter = ObsNorm((1, num_inputs), clip=5)
        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64)
        self.a_fc2 = nn.Linear(64, 64)
        self.a_fc_mean = nn.Linear(64, action_space.shape[0])
        self.a_log_std = nn.Parameter(torch.zeros(1, action_space.shape[0]))

        self.v_fc1 = nn.Linear(num_inputs, 32)
        self.v_fc2 = nn.Linear(32, 32)
        self.v_fc3 = nn.Linear(32, 1)

        self.latent_model = LatentModel(32 + action_space.shape[0], 32)

        self.apply(weights_init_mlp)

        self.do_encode_mean = do_encode_mean

        tanh_gain = nn.init.calculate_gain('tanh')
        #self.a_fc1.weight.data.mul_(tanh_gain)
        #self.a_fc2.weight.data.mul_(tanh_gain)
        self.a_fc_mean.weight.data.mul_(0.01)
        #self.v_fc1.weight.data.mul_(tanh_gain)
        #self.v_fc2.weight.data.mul_(tanh_gain)

        self.train()
Ejemplo n.º 3
0
    def __init__(self, num_inputs, num_outputs):
        super(LatentModel, self).__init__()
        self.enc_filter = ObsNorm((1, 32), clip=10.0)
        self.fc1 = nn.Linear(num_inputs, num_outputs)
        self.fcr = nn.Linear(num_inputs, 1)  # reward est
        self.apply(weights_init_mlp)

        self.train()
Ejemplo n.º 4
0
    def __init__(self, obs_shape, action_space):
        super(CNNContinuousPolicySeparate, self).__init__()

        num_inputs = obs_shape[0]
        d = obs_shape[-1]
        self.extra_conv = False

        if d == 32:
            self.actor_conv_reshape = 16 * 8 * 8
        elif d == 48:
            self.actor_conv_reshape = 16 * 12 * 12
            # self.actor_conv_reshape = 16 * 6 * 6
        elif d == 64:
            self.actor_conv_reshape = 16 * 8 * 8
        else:
            raise Exception

        self.conv1_a = nn.Conv2d(num_inputs, 16, 4, stride=2, padding=1)
        self.conv2_a = nn.Conv2d(16, 16, 4, stride=2, padding=1)
        if d > 48:
            self.conv3_a = nn.Conv2d(16, 16, 4, stride=2, padding=1)
            self.extra_conv = True
        self.linear1_a = nn.Linear(self.actor_conv_reshape, 32)
        self.fc_mean_a = nn.Linear(32, action_space.shape[0])
        self.a_log_std = nn.Parameter(torch.zeros(1, action_space.shape[0]))

        if d == 32:
            self.critic_conv_reshape = 16 * 16 * 16
        elif d == 48:
            self.critic_conv_reshape = 16 * 24 * 24
            # self.critic_conv_reshape = 16 * 12 * 12
        elif d == 64:
            self.critic_conv_reshape = 16 * 16 * 16
        else:
            raise Exception

        self.conv1_v = nn.Conv2d(num_inputs, 16, 4, stride=2, padding=1)
        if self.extra_conv:
            self.conv2_v = nn.Conv2d(16, 16, 4, stride=2, padding=1)
        self.linear1_v = nn.Linear(self.critic_conv_reshape, 32)
        self.enc_filter = ObsNorm((1, 32), clip=10.0)
        self.critic_linear_v = nn.Linear(32, 1)

        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1_a.weight.data.mul_(relu_gain)
        self.conv2_a.weight.data.mul_(relu_gain)
        self.linear1_a.weight.data.mul_(relu_gain)
        self.conv1_v.weight.data.mul_(relu_gain)
        self.linear1_v.weight.data.mul_(relu_gain)

        self.train()
Ejemplo n.º 5
0
    def __init__(self, num_inputs, action_space):
        super(CNN3ContinuousPolicySeparate, self).__init__()

        self.conv_reshape = 16 * 3 * 3
        self.conv1_a = nn.Conv3d(1, 32, 3, stride=(1, 2, 2), padding=(1, 0, 0))
        self.conv2_a = nn.Conv3d(32,
                                 32,
                                 3,
                                 stride=(1, 2, 2),
                                 padding=(1, 0, 0))
        self.conv3_a = nn.Conv3d(32,
                                 16, (4, 3, 3),
                                 stride=(1, 2, 2),
                                 padding=(0, 0, 0))
        self.linear1_a = nn.Linear(self.conv_reshape, 64)
        self.fc_mean_a = nn.Linear(64, action_space.shape[0])
        self.a_log_std = nn.Parameter(torch.zeros(1, action_space.shape[0]))

        self.conv1_v = nn.Conv3d(1, 32, 3, stride=(1, 2, 2), padding=(1, 0, 0))
        self.conv2_v = nn.Conv3d(32,
                                 32,
                                 3,
                                 stride=(1, 2, 2),
                                 padding=(1, 0, 0))
        self.conv3_v = nn.Conv3d(32,
                                 16, (4, 3, 3),
                                 stride=(1, 2, 2),
                                 padding=(0, 0, 0))
        self.linear1_v = nn.Linear(self.conv_reshape, 64)
        self.enc_filter = ObsNorm((1, 64), clip=10.0)

        self.critic_linear_v = nn.Linear(64, 1)

        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('tanh')
        self.conv1_a.weight.data.mul_(relu_gain)
        self.conv2_a.weight.data.mul_(relu_gain)
        self.conv3_a.weight.data.mul_(relu_gain)
        self.linear1_a.weight.data.mul_(relu_gain)
        self.conv1_v.weight.data.mul_(relu_gain)
        self.conv2_v.weight.data.mul_(relu_gain)
        self.conv3_v.weight.data.mul_(relu_gain)
        self.linear1_v.weight.data.mul_(relu_gain)

        self.train()
Ejemplo n.º 6
0
    def __init__(self, num_inputs, action_space_shape):
        super(MLPPolicy, self).__init__()

        self.obs_filter = ObsNorm((1, num_inputs), clip=5)
        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64)
        self.a_fc2 = nn.Linear(64, 64)

        self.v_fc1 = nn.Linear(num_inputs, 64)
        self.v_fc2 = nn.Linear(64, 64)
        self.v_fc3 = nn.Linear(64, 1)

        num_outputs = action_space_shape
        self.dist = Categorical(64, num_outputs)

        self.train()
        self.reset_parameters()
Ejemplo n.º 7
0
    def __init__(self, num_inputs, action_space, do_encode_mean=True):
        print("Making shared MLP actor critic!")
        super(MLPPolicy, self).__init__()

        self.obs_filter = ObsNorm((1, num_inputs), clip=5)
        self.action_space = action_space

        self.fc1 = nn.Linear(num_inputs, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc_mean = nn.Linear(64, action_space.shape[0])
        self.log_std = nn.Parameter(torch.zeros(1, action_space.shape[0]))
        self.fc_val = nn.Linear(64, 1)
        self.latent_model = LatentModel(64 + action_space.shape[0], 64)

        self.apply(weights_init_mlp)

        tanh_gain = nn.init.calculate_gain('tanh')
        self.fc_mean.weight.data.mul_(0.01)

        self.train()
Ejemplo n.º 8
0
    def __init__(self, num_inputs, action_space):
        super(MLPPolicy, self).__init__()

        self.obs_filter = ObsNorm((1, num_inputs), clip=5)
        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64, bias=False)
        self.a_ab1 = AddBias(64)
        self.a_fc2 = nn.Linear(64, 64, bias=False)
        self.a_ab2 = AddBias(64)

        self.v_fc1 = nn.Linear(num_inputs, 64, bias=False)
        self.v_ab1 = AddBias(64)
        self.v_fc2 = nn.Linear(64, 64, bias=False)
        self.v_ab2 = AddBias(64)
        self.v_fc3 = nn.Linear(64, 1, bias=False)
        self.v_ab3 = AddBias(1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(64, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError

        self.apply(weights_init_mlp)

        tanh_gain = nn.init.calculate_gain('tanh')
        #self.a_fc1.weight.data.mul_(tanh_gain)
        #self.a_fc2.weight.data.mul_(tanh_gain)
        #self.v_fc1.weight.data.mul_(tanh_gain)
        #self.v_fc2.weight.data.mul_(tanh_gain)

        if action_space.__class__.__name__ == "Box":
            self.dist.fc_mean.weight.data.mul_(0.01)

        self.train()
Ejemplo n.º 9
0
    def __init__(self, num_inputs, action_space):
        super(MLPPolicy, self).__init__()

        self.obs_filter = ObsNorm((1, num_inputs), clip=5)
        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64)
        self.a_fc2 = nn.Linear(64, 64)

        self.v_fc1 = nn.Linear(num_inputs, 64)
        self.v_fc2 = nn.Linear(64, 64)
        self.v_fc3 = nn.Linear(64, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(64, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()