def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.s_dim = s_dim self.a_dim = a_dim self.pi1 = nn.Linear(s_dim, 200) self.pi2 = nn.Linear(200, a_dim) self.v1 = nn.Linear(s_dim, 100) self.v2 = nn.Linear(100, 1) set_init([self.pi1, self.pi2, self.v1, self.v2]) self.distribution = torch.distributions.Categorical
def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.s_dim = s_dim self.a_dim = a_dim self.a1 = nn.Linear(s_dim, 100) self.mu = nn.Linear(100, a_dim) self.sigma = nn.Linear(100, a_dim) self.c1 = nn.Linear(s_dim, 100) self.v = nn.Linear(100, 1) set_init([self.a1, self.mu, self.sigma, self.c1, self.v]) self.distribution = torch.distributions.Normal
def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.s_dim = s_dim self.conv1 = nn.Conv2d(3, s_dim, (1, 1), padding=0) self.conv2 = nn.Conv2d(3, s_dim, (1, 1), padding=0) self.pi1 = nn.Linear(s_dim * 6 * 6, 100) self.pi2 = nn.Linear(100, a_dim) self.v1 = nn.Linear(s_dim * 6 * 6, 100) self.v2 = nn.Linear(100, 1) set_init([self.pi1, self.pi2, self.v1, self.v2]) self.distribution = torch.distributions.Categorical
def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.s_dim = s_dim self.a_dim = a_dim # actor-net self.pi1 = nn.Linear(s_dim, 128) self.pi2 = nn.Linear(128, a_dim) # critic-net self.v1 = nn.Linear(s_dim, 128) self.v2 = nn.Linear(128, 1) set_init([self.pi1, self.pi2, self.v1, self.v2]) self.distribution = torch.distributions.Categorical
def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.s_dim = s_dim self.a_dim = a_dim self.pi1 = nn.Linear(s_dim, 100) # 4x100 self.pi2 = nn.Linear(100, a_dim) # 100x2 self.v1 = nn.Linear(s_dim, 100) # 4x100 self.v2 = nn.Linear(100, 1) # 100x1 set_init([ self.pi1, self.pi2, self.v1, self.v2 ]) ## utils.py ## layer.wegiht(mean=0,std=0.1) ## layer.bias = 0.1 self.distribution = torch.distributions.Categorical
def __init__(self, action_d, observation_d): super(Net, self).__init__() self.action_d = action_d self.observation_d = observation_d self.policy_layer_1 = nn.Linear(self.observation_d, 256) self.policy_layer_2 = nn.Linear(256, self.action_d) self.value_layer_1 = nn.Linear(self.observation_d, 256) self.value_layer_2 = nn.Linear(256, 1) set_init([ self.policy_layer_1, self.policy_layer_2, self.value_layer_1, self.value_layer_2 ]) self.distribution = torch.distributions.Categorical
def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.s_dim = s_dim self.a_dim = a_dim # actor-net, stochastic policy self.a1 = nn.Linear(s_dim, 200) self.mu = nn.Linear(200, a_dim) self.sigma = nn.Linear(200, a_dim) # critic-net self.c1 = nn.Linear(s_dim, 100) self.v = nn.Linear(100, 1) set_init([self.a1, self.mu, self.sigma, self.c1, self.v]) self.distribution = torch.distributions.Normal
def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.s_dim = s_dim self.a_dim = a_dim self.b1 = nn.Linear(s_dim, 32) # self.bn1 = nn.BatchNorm1d(32, momentum=0.5) self.b2 = nn.Linear(32, 24) # self.bn2 = nn.BatchNorm1d(24, momentum=0.5) # self.b3 = nn.Linear(24,16) # self.bn3 = nn.BatchNorm1d(16, momentum=0.5) self.pi = nn.Linear(24, a_dim) self.v = nn.Linear(24, 1) set_init([self.b1, self.b2, self.pi, self.v]) self.distribution = torch.distributions.Categorical
def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.a = nn.Linear(s_dim, 512) self.mu = nn.Linear(512, a_dim) self.sigma = nn.Linear(512, a_dim) self.c = nn.Linear(s_dim, 256) self.v = nn.Linear(256, 1) # xvaier_init(self.a.weight) # xvaier_init(self.mu.weight) # xvaier_init(self.sigma.weight) # xvaier_init(self.c.weight) # xvaier_init(self.v.weight) set_init([self.a, self.mu, self.sigma, self.c, self.v]) self.distribution = torch.distributions.Normal
def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.s_dim = s_dim self.a_dim = a_dim self.pi1 = nn.Linear(s_dim, 128) self.pi12 = nn.Linear(128, 32) self.pi13 = nn.Linear(32, 16) self.pi2 = nn.Linear(16, a_dim) self.v1 = nn.Linear(s_dim, 64) self.v12 = nn.Linear(64, 16) self.v13 = nn.Linear(16, 16) self.v2 = nn.Linear(16, 1) self.drop = nn.Dropout(0.5) set_init([self.pi1, self.pi2, self.v1, self.v2]) self.distribution = torch.distributions.Categorical
def __init__(self): super(Net, self).__init__() #actor self.linear_1_a = nn.Linear(S_LEN, 200) self.linear_2_a = nn.Linear(200, 100) self.output_a = nn.Linear(100, A_DIM) #critic self.linear_1_c = nn.Linear(S_LEN, 200) self.linear_2_c = nn.Linear(200, 100) self.output_c = nn.Linear(100, 1) set_init([self.linear_1_a, self.linear_2_a, self.output_a, self.linear_1_c, self.linear_2_c, self.output_c]) self.distribution = torch.distributions.Categorical
def __init__(self, s_dim, action_mappings, action_line_mappings): super(Net, self).__init__() self.s_dim = s_dim self.action_mappings = action_mappings self.action_line_mappings = action_line_mappings self.pi1 = nn.Linear(s_dim, 896) self.pi2 = nn.Linear(896, 896) self.pi3 = nn.Linear(896, action_mappings.shape[0]) self.v1 = nn.Linear(s_dim, 896) self.v2 = nn.Linear(896, 896) self.v3 = nn.Linear(896, 1) set_init([self.pi1, self.pi2, self.pi3, self.v1, self.v2, self.v3]) self.distribution = torch.distributions.Categorical
def __init__(self, s_dim, a_dim): super(Net, self).__init__() self.s_dim = s_dim self.a_dim = a_dim self.al1 = nn.Linear(s_dim, 200) self.al2 = nn.Linear(200, 100) self.al3 = nn.Linear(100, 50) self.al4 = nn.Linear(50, a_dim) a_ls = [self.al1, self.al2, self.al3, self.al4] self.cl1 = nn.Linear(s_dim, 200) self.cl2 = nn.Linear(200, 100) self.cl3 = nn.Linear(100, 50) self.cl4 = nn.Linear(50, 1) cr_ls = [self.cl1, self.cl2, self.cl3, self.cl4] set_init(a_ls + cr_ls) self.distribution = torch.distributions.Categorical
def __init__(self, s_dim, a_dim, CNN=False): super(A3CNet, self).__init__() self.s_dim = s_dim self.a_dim = a_dim if CNN: self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=(1, 1)) self.pi1 = nn.Linear(1, 32) self.pi2 = nn.Linear(32, 32) self.LSTM = nn.LSTM( input_size=32, hidden_size=32, num_layers=1, ) else: self.pi1 = nn.Linear(s_dim, 128) self.pi2 = nn.Linear(128, a_dim) self.v1 = nn.Linear(s_dim, 128) self.v2 = nn.Linear(128, 1) set_init([self.pi1, self.pi2, self.v1, self.v2]) self.distribution = torch.distributions.Categorical