def __init__(self, num_inputs, num_outputs): super(DiagGaussian, self).__init__() init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_( x, 0)) self.fc_mean = init_(nn.Linear(num_inputs, num_outputs, bias=True)) # self.fc_mean.weight.data = torch.from_numpy(MIRROR_FACT).float() self.logstd = AddBias(torch.zeros(num_outputs)) self.synlogstd = AddBias(torch.ones(num_inputs) * 0) self.opto_probs = AddBias(torch.ones(1) * 0.01) self.syn_probs = 1.0
def __init__(self, num_inputs, num_outputs): super(GaussianPolicy, self).__init__() self.fc_mean = orthongonal_init_(nn.Linear(num_inputs, num_outputs)) self.logstd = AddBias(torch.zeros(num_outputs)) self.saved_log_probs = [] self.rewards = []
def __init__(self, num_inputs, num_outputs): super(DiagGaussian, self).__init__() init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_( x, 0)) self.fc_mean = init_(nn.Linear(num_inputs, num_outputs)) self.logstd = AddBias(torch.zeros(num_outputs))
def __init__(self, num_inputs, action_space): super(MLPPolicy, self).__init__() self.obs_filter = ObsNorm((1, num_inputs), clip=5) self.action_space = action_space self.a_fc1 = nn.Linear(num_inputs, 64, bias=False) self.a_ab1 = AddBias(64) self.a_fc2 = nn.Linear(64, 64, bias=False) self.a_ab2 = AddBias(64) self.a_fc_mean = nn.Linear(64, action_space.shape[0], bias=False) self.a_ab_mean = AddBias(action_space.shape[0]) self.a_ab_logstd = AddBias(action_space.shape[0]) self.v_fc1 = nn.Linear(num_inputs, 64, bias=False) self.v_ab1 = AddBias(64) self.v_fc2 = nn.Linear(64, 64, bias=False) self.v_ab2 = AddBias(64) self.v_fc3 = nn.Linear(64, 1, bias=False) self.v_ab3 = AddBias(1) self.apply(weights_init_mlp) tanh_gain = nn.init.calculate_gain('tanh') #self.a_fc1.weight.data.mul_(tanh_gain) #self.a_fc2.weight.data.mul_(tanh_gain) self.a_fc_mean.weight.data.mul_(0.01) #self.v_fc1.weight.data.mul_(tanh_gain) #self.v_fc2.weight.data.mul_(tanh_gain) self.train()
def __init__(self, state_shape, action_dim, lstm_size, use_cuda, use_lstm, name=''): super(MLPBase, self).__init__() init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_( x, 0)) self.state_shape = state_shape self.action_dim = action_dim self.lstm_size = lstm_size self.use_cuda = use_cuda self.use_lstm = use_lstm self.stddev = 1 # self.conv1 = nn.Conv2d(self.state_shape[0], 64, kernel_size=9, stride=1, padding = 4) # # self.bn1 = nn.BatchNorm2d(64) # self.conv2 = nn.Conv2d(64, 64, kernel_size=7, stride=1, padding = 3) # # self.bn2 = nn.BatchNorm2d(64) # self.conv3 = nn.Conv2d(64, 128, kernel_size=5, stride=1, padding = 2) # # self.bn3 = nn.BatchNorm2d(128) # self.conv4 = nn.Conv2d(128, 128, kernel_size=5, stride=1, padding = 2) # # self.bn4 = nn.BatchNorm2d(128) # self.conv5 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding = 1) # # self.bn5 = nn.BatchNorm2d(128) # self.conv6 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding = 1) # self.max4 = nn.MaxPool2d(2) # # self.bn6 = nn.BatchNorm2d(128) self.hidden11 = init_(nn.Linear(self.state_shape[3], 512)) self.hidden12 = init_(nn.Linear(self.state_shape[4], 512)) # self.hidden13 = init_(nn.Linear(self.state_shape[0], 256)) # self.hidden14 = init_(nn.Linear(self.state_shape[3], 256)) self.hidden2 = init_(nn.Linear(1024, 512)) if self.use_lstm: self.lstm = nn.LSTMCell(512, self.lstm_size) self.action_head = init_(nn.Linear(self.lstm_size, self.action_dim)) self.action_sigma = init_( nn.Linear(self.lstm_size, self.action_dim)) self.value_head = init_(nn.Linear(self.lstm_size, 1)) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) else: self.action_head = init_(nn.Linear(512, self.action_dim)) self.action_sigma = init_(nn.Linear(512, self.action_dim)) self.value_head = init_(nn.Linear(512, 1)) self.logstd = AddBias(torch.zeros(action_dim))
def __init__(self, num_inputs, action_space): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4, bias=False) self.ab1 = AddBias(32) self.conv2 = nn.Conv2d(32, 64, 4, stride=2, bias=False) self.ab2 = AddBias(64) self.conv3 = nn.Conv2d(64, 32, 3, stride=1, bias=False) self.ab3 = AddBias(32) self.linear1 = nn.Linear(32 * 7 * 7, 512, bias=False) self.ab_fc1 = AddBias(512) self.critic_linear = nn.Linear(512, 1, bias=False) self.ab_fc2 = AddBias(1) num_outputs = action_space.n self.actor_linear = nn.Linear(512, num_outputs, bias=False) self.ab_fc3 = AddBias(num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.linear1.weight.data.mul_(relu_gain) self.train()
def __init__(self, num_inputs, action_space): super(MLPPolicy, self).__init__() self.obs_filter = ObsNorm((1, num_inputs), clip=5) self.action_space = action_space self.a_fc1 = nn.Linear(num_inputs, 64, bias=False) self.a_ab1 = AddBias(64) self.a_fc2 = nn.Linear(64, 64, bias=False) self.a_ab2 = AddBias(64) self.v_fc1 = nn.Linear(num_inputs, 64, bias=False) self.v_ab1 = AddBias(64) self.v_fc2 = nn.Linear(64, 64, bias=False) self.v_ab2 = AddBias(64) self.v_fc3 = nn.Linear(64, 1, bias=False) self.v_ab3 = AddBias(1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(64, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(64, num_outputs) else: raise NotImplementedError self.apply(weights_init_mlp) tanh_gain = nn.init.calculate_gain('tanh') #self.a_fc1.weight.data.mul_(tanh_gain) #self.a_fc2.weight.data.mul_(tanh_gain) #self.v_fc1.weight.data.mul_(tanh_gain) #self.v_fc2.weight.data.mul_(tanh_gain) if action_space.__class__.__name__ == "Box": self.dist.fc_mean.weight.data.mul_(0.01) self.train()
def __init__(self, num_inputs, num_outputs, zero_init=False): super(DiagGaussian, self).__init__() init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_( x, 0)) self.fc_mean = nn.Linear(num_inputs, num_outputs) if not zero_init: self.fc_mean = init_(self.fc_mean) else: self.fc_mean.weight.data.fill_(0.0) self.fc_mean.bias.data.fill_(0.0) self.logstd = AddBias(torch.zeros(num_outputs))
def __init__(self, num_inputs, num_outputs, num_agents, sigmoid): super(DiagGaussian, self).__init__() # An instance (i.e., with specific weight and bias initializing function) of the function "init" init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_( x, 0)) # Number of inputs and outputs for each agent self.num_agents = num_agents self.atom_num_inputs = num_inputs // num_agents self.atom_num_outputs = num_outputs // num_agents self.fc_mean = init_( nn.Linear(self.atom_num_inputs, self.atom_num_outputs)) self.logstd = AddBias(torch.zeros(self.atom_num_outputs)) self.sigmoid = sigmoid
def __init__(self, num_inputs, action_space): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4, bias=False) self.ab1 = AddBias(32) self.conv2 = nn.Conv2d(32, 64, 4, stride=2, bias=False) self.ab2 = AddBias(64) self.conv3 = nn.Conv2d(64, 32, 3, stride=1, bias=False) self.ab3 = AddBias(32) self.linear1 = nn.Linear(32 * 7 * 7, 512, bias=False) self.ab_fc1 = AddBias(512) self.critic_linear = nn.Linear(512, 1, bias=False) self.ab_fc2 = AddBias(1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: raise NotImplementedError self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.linear1.weight.data.mul_(relu_gain) if action_space.__class__.__name__ == "Box": self.dist.fc_mean.weight.data.mul_(0.01) self.train()
def __init__(self, state_shape, action_dim, lstm_size, use_cuda, use_lstm, name=''): super(MLPBase, self).__init__() init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_( x, 0)) self.state_shape = state_shape self.action_dim = action_dim self.lstm_size = lstm_size self.use_cuda = use_cuda self.use_lstm = use_lstm self.stddev = 1 self.hidden1 = init_( nn.Linear(self.state_shape[1] + self.state_shape[2], 512)) # self.hidden12 = init_(nn.Linear(self.state_shape[4], 512)) # self.hidden13 = init_(nn.Linear(self.state_shape[0], 256)) # self.hidden14 = init_(nn.Linear(self.state_shape[3], 256)) self.hidden2 = init_(nn.Linear(512, 512)) if self.use_lstm: self.lstm = nn.LSTMCell(512, self.lstm_size) self.action_head = init_(nn.Linear(self.lstm_size, self.action_dim)) self.action_sigma = init_( nn.Linear(self.lstm_size, self.action_dim)) self.value_head = init_(nn.Linear(self.lstm_size, 1)) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) else: self.action_head = init_(nn.Linear(512, self.action_dim)) self.action_sigma = init_(nn.Linear(512, self.action_dim)) self.value_head = init_(nn.Linear(512, 1)) self.logstd = AddBias(torch.zeros(action_dim))
def __init__(self, num_inputs, num_outputs): super(DiagGaussian, self).__init__() self.fc_mean = nn.Linear(num_inputs, num_outputs) self.logstd = AddBias(torch.zeros(num_outputs))
def __init__(self, num_inputs, num_outputs): # input: 64, out: 2 super(DiagGaussian, self).__init__() self.fc_mean = nn.Linear(num_inputs, num_outputs) # actor의 마지막 layer가 여기 있음 self.logstd = AddBias(torch.zeros(num_outputs))
def __init__(self, module): super(SplitBias, self).__init__() self.module = module self.add_bias = AddBias(module.bias.data) self.module.bias = None
def __init__(self, num_outputs): super(DiagGaussianNoFC, self).__init__() self.logstd = AddBias(torch.zeros(num_outputs))
def __init__(self, num_inputs, num_outputs): super(DiagGaussian, self).__init__() self.fc_mean = nn.Linear(64, num_outputs, bias=False) self.ab_mean = AddBias(num_outputs) self.ab_logstd = AddBias(num_outputs)
def __init__(self, num_inputs, num_outputs): super(Categorical, self).__init__() self.linear = nn.Linear(num_inputs, num_outputs, bias=False) self.ab = AddBias(num_outputs)