コード例 #1
0
class CNNPolicy(nn.Module):
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear = nn.Linear(512, 1)

        self.V_linear_1 = nn.Linear(512, 20)
        self.V_linear_2 = nn.Linear(20, 1)
        self.Q_linear_1 = nn.Linear(512 + action_space.n, 20)
        self.Q_linear_2 = nn.Linear(20, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()

    def reset_parameters(self):
        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.linear1.weight.data.mul_(relu_gain)

        if self.dist.__class__.__name__ == "DiagGaussian":
            self.dist.fc_mean.weight.data.mul_(0.01)

    def encode(self, inputs):

        x = self.conv1(inputs / 255.0)
        x = F.relu(x)

        x = self.conv2(x)
        x = F.relu(x)

        x = self.conv3(x)
        x = F.relu(x)

        x = x.view(-1, 32 * 7 * 7)

        x = self.linear1(x)

        return x

    def predict_for_action(self, inputs):

        for_action = F.relu(inputs)

        return for_action

    def predict_for_value(self, inputs):

        x = F.relu(inputs)
        for_value = self.critic_linear(x)

        return for_value

    def forward(self, inputs):

        x = self.encode(inputs)
        for_action = self.predict_for_action(x)
        for_value = self.predict_for_value(x)

        return for_value, for_action

    # for Q
    def predict_value_with_action(self, inputs, action):

        # print (action)
        # print (action.float())

        #concat
        inputs = torch.cat((inputs, Variable(action).cuda()), 1)

        x = F.relu(inputs)
        # print (x)
        x = self.Q_linear_1(x)
        x = F.relu(x)
        x = self.Q_linear_2(x)
        return x

    # for V
    def predict_value_without_action(self, inputs):

        x = F.relu(inputs)
        x = self.V_linear_1(x)
        x = F.relu(x)
        x = self.V_linear_2(x)
        return x

    def get_V_and_Q(self, inputs, actions):

        x = self.encode(inputs)
        V = self.predict_value_without_action(x)
        Q = self.predict_value_with_action(x, actions)

        return V, Q

    def action_dist(self, inputs):
        x = self.encode(inputs)
        for_action = self.predict_for_action(x)

        return self.dist.action_probs(for_action)

    def act(self, inputs, deterministic=False):
        value, x_action = self(inputs)
        # action = self.dist.sample(x_action, deterministic=deterministic)
        # action_log_probs, dist_entropy = self.dist.evaluate_actions(x_action, actions)

        # x_action.mean().backward()
        # fsadf

        action, action_log_probs, dist_entropy = self.dist.sample2(
            x_action, deterministic=deterministic)

        # action_log_probs.mean().backward()
        # fsadf

        return value, action, action_log_probs, dist_entropy
コード例 #2
0
class CNNPolicy(nn.Module):
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        # self.conv1_bn = nn.BatchNorm2d(32)
        # self.conv2_bn = nn.BatchNorm2d(64)
        # self.conv3_bn = nn.BatchNorm2d(32)

        self.act_func = F.leaky_relu  # F.tanh ##  F.elu F.relu F.softplus

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear = nn.Linear(512, 1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()

    def reset_parameters(self):
        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.linear1.weight.data.mul_(relu_gain)

        if self.dist.__class__.__name__ == "DiagGaussian":
            self.dist.fc_mean.weight.data.mul_(0.01)

    def encode(self, inputs):

        x = self.conv1(inputs / 255.0)
        # x = self.conv1_bn(self.conv1(inputs / 255.0))
        # x = F.relu(x)
        # x = F.elu(x)
        # x = F.softplus(x)
        # x = F.tanh(x)
        x = self.act_func(x)

        x = self.conv2(x)
        # x = self.conv2_bn(self.conv2(x))
        # x = F.relu(x)
        # x = F.elu(x)
        # x = F.softplus(x)
        x = self.act_func(x)

        x = self.conv3(x)
        # x = self.conv3_bn(self.conv3(x))
        # x = F.relu(x)
        # x = F.elu(x)
        # x = F.softplus(x)
        x = self.act_func(x)

        x = x.view(-1, 32 * 7 * 7)

        x = self.linear1(x)

        return x

    def predict_for_action(self, inputs):

        # for_action = F.relu(inputs)
        # for_action = F.elu(inputs)
        # for_action = F.softplus(inputs)
        for_action = self.act_func(inputs)

        return for_action

    def predict_for_value(self, inputs):

        # x = F.relu(inputs)
        # x = F.elu(inputs)
        # x = F.softplus(inputs)
        x = self.act_func(inputs)

        for_value = self.critic_linear(x)

        return for_value

    def forward(self, inputs):

        x = self.encode(inputs)
        for_action = self.predict_for_action(x)
        for_value = self.predict_for_value(x)

        return for_value, for_action

    def action_dist(self, inputs):
        x = self.encode(inputs)
        for_action = self.predict_for_action(x)

        return self.dist.action_probs(for_action)

    def act(self, inputs, deterministic=False):

        # print ('sss')
        value, x_action = self(inputs)
        # action = self.dist.sample(x_action, deterministic=deterministic)
        # action_log_probs, dist_entropy = self.dist.evaluate_actions(x_action, actions)

        # x_action.mean().backward()
        # fsadf

        action, action_log_probs, dist_entropy = self.dist.sample2(
            x_action, deterministic=deterministic)

        # action_log_probs.mean().backward()
        # fsadf

        # print (value)
        # print (action)
        # fdsfa

        return value, action, action_log_probs, dist_entropy
コード例 #3
0
ファイル: model.py プロジェクト: rfarouni/Other_Code
class CNNPolicy2(FFPolicy):
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy2, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear1 = nn.Linear(512, 200)
        self.critic_linear2 = nn.Linear(200, 1)

        self.actor_linear1 = nn.Linear(512, 200)
        # self.actor_linear2 = nn.Linear(200, 200)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(200, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(200, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()

    def reset_parameters(self):
        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.linear1.weight.data.mul_(relu_gain)

        if self.dist.__class__.__name__ == "DiagGaussian":
            self.dist.fc_mean.weight.data.mul_(0.01)

    # def forward(self, inputs):
    #     x = self.conv1(inputs / 255.0)
    #     x = F.relu(x)

    #     x = self.conv2(x)
    #     x = F.relu(x)

    #     x = self.conv3(x)
    #     x = F.relu(x)

    #     x = x.view(-1, 32 * 7 * 7)
    #     x = self.linear1(x)
    #     x = F.relu(x)

    #     return self.critic_linear(x), x

    def forward(self, inputs):
        x = self.conv1(inputs / 255.0)
        x = F.relu(x)

        x = self.conv2(x)
        x = F.relu(x)

        x = self.conv3(x)
        x = F.relu(x)

        x = x.view(-1, 32 * 7 * 7)
        x = self.linear1(x)  #[B,512]
        x = F.relu(x)

        x_a = self.actor_linear1(x)
        x_a = F.relu(x_a)

        x_v = self.critic_linear1(x)
        x_v = F.relu(x_v)
        x_v = self.critic_linear2(x_v)

        return x_v, x_a

    def action_dist(self, inputs):
        x = self.conv1(inputs / 255.0)
        x = F.relu(x)

        x = self.conv2(x)
        x = F.relu(x)

        x = self.conv3(x)
        x = F.relu(x)

        x = x.view(-1, 32 * 7 * 7)
        x = self.linear1(x)  #[B,512]
        x = F.relu(x)

        x_a = self.actor_linear1(x)
        x_a = F.relu(x_a)

        # x_v = self.critic_linear1(x)
        # x_v = F.relu(x_v)
        # x_v = self.critic_linear2(x_v)

        # print (x_a)

        return self.dist.action_probs(x_a)
コード例 #4
0
class CNNPolicy(nn.Module):
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            raise NotImplementedError
        self.num_inputs = num_inputs  #num of stacked frames
        self.num_outputs = num_outputs  #action size

        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear = nn.Linear(512, 1)

        # self.state_pred_linear_1 = nn.Linear(512+num_outputs, 32 * 7 * 7)
        # self.deconv1 = torch.nn.ConvTranspose2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
        # self.deconv2 = torch.nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=4, stride=2)
        # # self.deconv3 = torch.nn.ConvTranspose2d(in_channels=32, out_channels=num_inputs, kernel_size=8, stride=4)
        # self.deconv3 = torch.nn.ConvTranspose2d(in_channels=32, out_channels=1, kernel_size=8, stride=4)

        # # self.deconv1 = torch.nn.ConvTranspose2d(in_channels=32, out_channels=2, kernel_size=3, stride=1)
        # # self.deconv2 = torch.nn.ConvTranspose2d(in_channels=2, out_channels=2, kernel_size=4, stride=2)
        # # # self.deconv3 = torch.nn.ConvTranspose2d(in_channels=32, out_channels=num_inputs, kernel_size=8, stride=4)
        # # self.deconv3 = torch.nn.ConvTranspose2d(in_channels=2, out_channels=1, kernel_size=8, stride=4)

        # # self.state_pred_linear_2 = nn.Linear(32 * 7 * 7, 512)

        # self.train()
        # self.reset_parameters()

    # def reset_parameters(self):
    #     self.apply(weights_init)

    #     relu_gain = nn.init.calculate_gain('relu')
    #     self.conv1.weight.data.mul_(relu_gain)
    #     self.conv2.weight.data.mul_(relu_gain)
    #     self.conv3.weight.data.mul_(relu_gain)
    #     self.linear1.weight.data.mul_(relu_gain)

    #     if self.dist.__class__.__name__ == "DiagGaussian":
    #         self.dist.fc_mean.weight.data.mul_(0.01)

    def encode(self, inputs):

        x = self.conv1(inputs / 255.0)
        x = F.relu(x)

        x = self.conv2(x)
        x = F.relu(x)

        x = self.conv3(x)
        x = F.relu(x)

        x = x.view(-1, 32 * 7 * 7)

        x = self.linear1(x)

        self.z = x

        return x

    def predict_for_action(self, inputs):

        for_action = F.relu(inputs)

        return for_action

    def predict_for_value(self, inputs):

        x = F.relu(inputs)
        for_value = self.critic_linear(x)

        return for_value

    def forward(self, inputs):

        x = self.encode(inputs)
        for_action = self.predict_for_action(x)
        for_value = self.predict_for_value(x)

        return for_value, for_action

    def action_dist(self, inputs):
        x = self.encode(inputs)
        for_action = self.predict_for_action(x)

        return self.dist.action_probs(for_action)

    def act(self, inputs, deterministic=False):
        value, x_action = self.forward(inputs)
        # action = self.dist.sample(x_action, deterministic=deterministic)
        # action_log_probs, dist_entropy = self.dist.evaluate_actions(x_action, actions)

        # x_action.mean().backward()
        # fsadf

        action, action_log_probs, dist_entropy = self.dist.sample2(
            x_action, deterministic=deterministic)

        # action_log_probs.mean().backward()
        # fsadf

        return value, action, action_log_probs, dist_entropy

    # def evaluate_actions(self, inputs, actions):
    #     value, x = self(inputs)
    #     action_log_probs, dist_entropy = self.dist.evaluate_actions(x, actions)
    #     return value, action_log_probs, dist_entropy

    def predict_next_state(self, state, action):

        frame_size = 84

        # print (state.size())
        # print (action.size())
        # print (self.num_outputs) # aciton size
        # print (self.num_inputs)  # num stacks
        # # print (state)
        # print (action)

        z = self.encode(state)  #[P,Z]

        #convert aciton to one hot
        action_onehot = torch.zeros(action.size()[0], self.num_outputs)
        # action_onehot[action.data.cpu()] = 1.

        action_onehot.scatter_(1, action.data.cpu(), 1)  #[P,A]
        action_onehot = Variable(action_onehot.float().cuda())

        # print (action_onehot)
        # fasda

        #concat action and state, predict next one
        # print (z)
        # print (action_onehot)
        z = torch.cat((z, action_onehot), dim=1)  #[P,Z+A] -> P,512+4

        # print (z.size())
        # fdsa

        #deconv

        z = self.state_pred_linear_1(z)
        z = z.view(-1, 32, 7, 7)

        z = self.deconv1(z)
        z = F.relu(z)
        z = self.deconv2(z)
        z = F.relu(z)
        z = self.deconv3(z)
        z = z * 255.

        # print (z.size())
        # fdsfa

        return z

    def predict_next_state2(self, state, action):

        frame_size = 84

        # z = self.encode(state)  #[P,Z]
        z = self.z

        #convert aciton to one hot
        action_onehot = torch.zeros(action.size()[0], self.num_outputs)
        # action_onehot[action.data.cpu()] = 1.

        action_onehot.scatter_(1, action.data.cpu(), 1)  #[P,A]
        action_onehot = Variable(action_onehot.float().cuda())

        #concat action and state, predict next one

        z = torch.cat((z, action_onehot), dim=1)  #[P,Z+A] -> P,512+4

        #deconv

        # print (z.size())

        z = self.state_pred_linear_1(z)
        z = z.view(-1, 32, 7, 7)

        z = self.deconv1(z)
        z = F.relu(z)
        z = self.deconv2(z)
        z = F.relu(z)
        z = self.deconv3(z)
        z = z * 255.

        return z
コード例 #5
0
ファイル: model.py プロジェクト: chriscremer/Other_Code
class CNNPolicy_with_var(nn.Module):
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy_with_var, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        self.critic_linear1 = nn.Linear(512, 200)
        self.critic_linear_mean = nn.Linear(200, 1)
        self.critic_linear_logvar = nn.Linear(200, 1)

        self.actor_linear1 = nn.Linear(512, 200)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(200, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(200, num_outputs)
        else:
            raise NotImplementedError

        self.train()
        self.reset_parameters()

    def reset_parameters(self):
        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.linear1.weight.data.mul_(relu_gain)

        if self.dist.__class__.__name__ == "DiagGaussian":
            self.dist.fc_mean.weight.data.mul_(0.01)


    def forward(self, inputs):
        x = self.conv1(inputs / 255.0)
        x = F.relu(x)

        x = self.conv2(x)
        x = F.relu(x)

        x = self.conv3(x)
        x = F.relu(x)

        x = x.view(-1, 32 * 7 * 7)
        x = self.linear1(x) #[B,512]
        x = F.relu(x)

        x_a = self.actor_linear1(x)
        x_a = F.relu(x_a)

        x_v = self.critic_linear1(x)
        x_v = F.relu(x_v)
        value_mean = self.critic_linear_mean(x_v)
        value_logvar = self.critic_linear_logvar(x_v)

        return value_mean, value_logvar, x_a


    def action_dist(self, inputs):
        x = self.conv1(inputs / 255.0)
        x = F.relu(x)

        x = self.conv2(x)
        x = F.relu(x)

        x = self.conv3(x)
        x = F.relu(x)

        x = x.view(-1, 32 * 7 * 7)
        x = self.linear1(x) #[B,512]
        x = F.relu(x)

        x_a = self.actor_linear1(x)
        x_a = F.relu(x_a)

        return self.dist.action_probs(x_a)



    def act(self, inputs, deterministic=False):
        value_mean, value_logvar, x_a = self.forward(inputs)
        action = self.dist.sample(x_a, deterministic=deterministic)
        return value_mean, value_logvar, action

    def evaluate_actions(self, inputs, actions):
        value_mean, value_logvar, x_a = self.forward(inputs)
        action_log_probs, dist_entropy = self.dist.evaluate_actions(x_a, actions)
        return value_mean, value_logvar, action_log_probs, dist_entropy