Beispiel #1
0
    def __init__(self, num_inputs, num_outputs):
        super(DiagGaussian, self).__init__()

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(
            x, 0))

        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs, bias=True))
        # self.fc_mean.weight.data = torch.from_numpy(MIRROR_FACT).float()
        self.logstd = AddBias(torch.zeros(num_outputs))
        self.synlogstd = AddBias(torch.ones(num_inputs) * 0)
        self.opto_probs = AddBias(torch.ones(1) * 0.01)
        self.syn_probs = 1.0
Beispiel #2
0
    def __init__(self, num_inputs, num_outputs):
        super(GaussianPolicy, self).__init__()

        self.fc_mean = orthongonal_init_(nn.Linear(num_inputs, num_outputs))
        self.logstd = AddBias(torch.zeros(num_outputs))

        self.saved_log_probs = []
        self.rewards = []
Beispiel #3
0
    def __init__(self, num_inputs, num_outputs):
        super(DiagGaussian, self).__init__()

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(
            x, 0))

        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
        self.logstd = AddBias(torch.zeros(num_outputs))
Beispiel #4
0
    def __init__(self, num_inputs, action_space):
        super(MLPPolicy, self).__init__()

        self.obs_filter = ObsNorm((1, num_inputs), clip=5)
        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64, bias=False)
        self.a_ab1 = AddBias(64)
        self.a_fc2 = nn.Linear(64, 64, bias=False)
        self.a_ab2 = AddBias(64)
        self.a_fc_mean = nn.Linear(64, action_space.shape[0], bias=False)
        self.a_ab_mean = AddBias(action_space.shape[0])
        self.a_ab_logstd = AddBias(action_space.shape[0])

        self.v_fc1 = nn.Linear(num_inputs, 64, bias=False)
        self.v_ab1 = AddBias(64)
        self.v_fc2 = nn.Linear(64, 64, bias=False)
        self.v_ab2 = AddBias(64)
        self.v_fc3 = nn.Linear(64, 1, bias=False)
        self.v_ab3 = AddBias(1)

        self.apply(weights_init_mlp)

        tanh_gain = nn.init.calculate_gain('tanh')
        #self.a_fc1.weight.data.mul_(tanh_gain)
        #self.a_fc2.weight.data.mul_(tanh_gain)
        self.a_fc_mean.weight.data.mul_(0.01)
        #self.v_fc1.weight.data.mul_(tanh_gain)
        #self.v_fc2.weight.data.mul_(tanh_gain)

        self.train()
Beispiel #5
0
    def __init__(self,
                 state_shape,
                 action_dim,
                 lstm_size,
                 use_cuda,
                 use_lstm,
                 name=''):
        super(MLPBase, self).__init__()

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(
            x, 0))

        self.state_shape = state_shape
        self.action_dim = action_dim
        self.lstm_size = lstm_size
        self.use_cuda = use_cuda
        self.use_lstm = use_lstm
        self.stddev = 1
        # self.conv1 = nn.Conv2d(self.state_shape[0], 64, kernel_size=9, stride=1, padding = 4)
        # # self.bn1 = nn.BatchNorm2d(64)
        # self.conv2 = nn.Conv2d(64, 64, kernel_size=7, stride=1, padding = 3)
        # # self.bn2 = nn.BatchNorm2d(64)
        # self.conv3 = nn.Conv2d(64, 128, kernel_size=5, stride=1, padding = 2)
        # # self.bn3 = nn.BatchNorm2d(128)
        # self.conv4 = nn.Conv2d(128, 128, kernel_size=5, stride=1, padding = 2)
        # # self.bn4 = nn.BatchNorm2d(128)
        # self.conv5 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding = 1)
        # # self.bn5 = nn.BatchNorm2d(128)
        # self.conv6 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding = 1)
        # self.max4 = nn.MaxPool2d(2)
        # # self.bn6 = nn.BatchNorm2d(128)

        self.hidden11 = init_(nn.Linear(self.state_shape[3], 512))
        self.hidden12 = init_(nn.Linear(self.state_shape[4], 512))
        # self.hidden13 = init_(nn.Linear(self.state_shape[0], 256))
        # self.hidden14 = init_(nn.Linear(self.state_shape[3], 256))
        self.hidden2 = init_(nn.Linear(1024, 512))
        if self.use_lstm:
            self.lstm = nn.LSTMCell(512, self.lstm_size)
            self.action_head = init_(nn.Linear(self.lstm_size,
                                               self.action_dim))
            self.action_sigma = init_(
                nn.Linear(self.lstm_size, self.action_dim))
            self.value_head = init_(nn.Linear(self.lstm_size, 1))

            self.lstm.bias_ih.data.fill_(0)
            self.lstm.bias_hh.data.fill_(0)
        else:
            self.action_head = init_(nn.Linear(512, self.action_dim))
            self.action_sigma = init_(nn.Linear(512, self.action_dim))
            self.value_head = init_(nn.Linear(512, 1))

        self.logstd = AddBias(torch.zeros(action_dim))
Beispiel #6
0
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4, bias=False)
        self.ab1 = AddBias(32)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2, bias=False)
        self.ab2 = AddBias(64)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1, bias=False)
        self.ab3 = AddBias(32)

        self.linear1 = nn.Linear(32 * 7 * 7, 512, bias=False)
        self.ab_fc1 = AddBias(512)

        self.critic_linear = nn.Linear(512, 1, bias=False)
        self.ab_fc2 = AddBias(1)

        num_outputs = action_space.n
        self.actor_linear = nn.Linear(512, num_outputs, bias=False)
        self.ab_fc3 = AddBias(num_outputs)

        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.linear1.weight.data.mul_(relu_gain)

        self.train()
Beispiel #7
0
    def __init__(self, num_inputs, action_space):
        super(MLPPolicy, self).__init__()

        self.obs_filter = ObsNorm((1, num_inputs), clip=5)
        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64, bias=False)
        self.a_ab1 = AddBias(64)
        self.a_fc2 = nn.Linear(64, 64, bias=False)
        self.a_ab2 = AddBias(64)

        self.v_fc1 = nn.Linear(num_inputs, 64, bias=False)
        self.v_ab1 = AddBias(64)
        self.v_fc2 = nn.Linear(64, 64, bias=False)
        self.v_ab2 = AddBias(64)
        self.v_fc3 = nn.Linear(64, 1, bias=False)
        self.v_ab3 = AddBias(1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(64, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError

        self.apply(weights_init_mlp)

        tanh_gain = nn.init.calculate_gain('tanh')
        #self.a_fc1.weight.data.mul_(tanh_gain)
        #self.a_fc2.weight.data.mul_(tanh_gain)
        #self.v_fc1.weight.data.mul_(tanh_gain)
        #self.v_fc2.weight.data.mul_(tanh_gain)

        if action_space.__class__.__name__ == "Box":
            self.dist.fc_mean.weight.data.mul_(0.01)

        self.train()
Beispiel #8
0
    def __init__(self, num_inputs, num_outputs, zero_init=False):
        super(DiagGaussian, self).__init__()

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(
            x, 0))

        self.fc_mean = nn.Linear(num_inputs, num_outputs)
        if not zero_init:
            self.fc_mean = init_(self.fc_mean)
        else:
            self.fc_mean.weight.data.fill_(0.0)
            self.fc_mean.bias.data.fill_(0.0)

        self.logstd = AddBias(torch.zeros(num_outputs))
Beispiel #9
0
    def __init__(self, num_inputs, num_outputs, num_agents, sigmoid):
        super(DiagGaussian, self).__init__()

        # An instance (i.e., with specific weight and bias initializing function) of the function "init"
        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(
            x, 0))

        # Number of inputs and outputs for each agent
        self.num_agents = num_agents
        self.atom_num_inputs = num_inputs // num_agents
        self.atom_num_outputs = num_outputs // num_agents
        self.fc_mean = init_(
            nn.Linear(self.atom_num_inputs, self.atom_num_outputs))
        self.logstd = AddBias(torch.zeros(self.atom_num_outputs))
        self.sigmoid = sigmoid
Beispiel #10
0
    def __init__(self, num_inputs, action_space):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4, bias=False)
        self.ab1 = AddBias(32)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2, bias=False)
        self.ab2 = AddBias(64)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1, bias=False)
        self.ab3 = AddBias(32)

        self.linear1 = nn.Linear(32 * 7 * 7, 512, bias=False)
        self.ab_fc1 = AddBias(512)

        self.critic_linear = nn.Linear(512, 1, bias=False)
        self.ab_fc2 = AddBias(1)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(512, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(512, num_outputs)
        else:
            raise NotImplementedError

        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.linear1.weight.data.mul_(relu_gain)

        if action_space.__class__.__name__ == "Box":
            self.dist.fc_mean.weight.data.mul_(0.01)

        self.train()
    def __init__(self,
                 state_shape,
                 action_dim,
                 lstm_size,
                 use_cuda,
                 use_lstm,
                 name=''):
        super(MLPBase, self).__init__()

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(
            x, 0))

        self.state_shape = state_shape
        self.action_dim = action_dim
        self.lstm_size = lstm_size
        self.use_cuda = use_cuda
        self.use_lstm = use_lstm
        self.stddev = 1

        self.hidden1 = init_(
            nn.Linear(self.state_shape[1] + self.state_shape[2], 512))
        # self.hidden12 = init_(nn.Linear(self.state_shape[4], 512))
        # self.hidden13 = init_(nn.Linear(self.state_shape[0], 256))
        # self.hidden14 = init_(nn.Linear(self.state_shape[3], 256))
        self.hidden2 = init_(nn.Linear(512, 512))
        if self.use_lstm:
            self.lstm = nn.LSTMCell(512, self.lstm_size)
            self.action_head = init_(nn.Linear(self.lstm_size,
                                               self.action_dim))
            self.action_sigma = init_(
                nn.Linear(self.lstm_size, self.action_dim))
            self.value_head = init_(nn.Linear(self.lstm_size, 1))

            self.lstm.bias_ih.data.fill_(0)
            self.lstm.bias_hh.data.fill_(0)
        else:
            self.action_head = init_(nn.Linear(512, self.action_dim))
            self.action_sigma = init_(nn.Linear(512, self.action_dim))
            self.value_head = init_(nn.Linear(512, 1))

        self.logstd = AddBias(torch.zeros(action_dim))
Beispiel #12
0
 def __init__(self, num_inputs, num_outputs):
     super(DiagGaussian, self).__init__()
     self.fc_mean = nn.Linear(num_inputs, num_outputs)
     self.logstd = AddBias(torch.zeros(num_outputs))
Beispiel #13
0
 def __init__(self, num_inputs, num_outputs):  # input: 64, out: 2
     super(DiagGaussian, self).__init__()
     self.fc_mean = nn.Linear(num_inputs,
                              num_outputs)  # actor의 마지막 layer가 여기 있음
     self.logstd = AddBias(torch.zeros(num_outputs))
Beispiel #14
0
 def __init__(self, module):
     super(SplitBias, self).__init__()
     self.module = module
     self.add_bias = AddBias(module.bias.data)
     self.module.bias = None
Beispiel #15
0
    def __init__(self, num_outputs):
        super(DiagGaussianNoFC, self).__init__()

        self.logstd = AddBias(torch.zeros(num_outputs))
 def __init__(self, num_inputs, num_outputs):
     super(DiagGaussian, self).__init__()
     self.fc_mean = nn.Linear(64, num_outputs, bias=False)
     self.ab_mean = AddBias(num_outputs)
     self.ab_logstd = AddBias(num_outputs)
 def __init__(self, num_inputs, num_outputs):
     super(Categorical, self).__init__()
     self.linear = nn.Linear(num_inputs, num_outputs, bias=False)
     self.ab = AddBias(num_outputs)