Esempio n. 1
0
env = gym.make(args.env_name)

num_inputs = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]

env.seed(args.seed)
torch.manual_seed(args.seed)

if args.use_joint_pol_val:
    ac_net = ActorCritic(num_inputs, num_actions)
    opt_ac = optim.Adam(ac_net.parameters(), lr=0.0003)
else:
    policy_net = GRU(num_inputs, num_actions)
    old_policy_net = GRU(num_inputs, num_actions)
    value_net = Value(num_inputs)
    opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003)
    opt_value = optim.Adam(value_net.parameters(), lr=0.0003)


def create_batch_inputs(batch_states_list, batch_actions_list,
                        batch_advantages_list, batch_targets_list):
    lengths = []
    for states in batch_states_list:
        lengths.append(states.size(0))

    max_length = max(lengths)
    batch_states = torch.zeros(len(batch_states_list), max_length, num_inputs)
    batch_actions = torch.zeros(len(batch_actions_list), max_length,
                                num_actions)
    batch_advantages = torch.zeros(len(batch_advantages_list), max_length)
    batch_mask = []
Esempio n. 2
0
            test_set.append(data_set[t])

        sequence_length = 401
        input_size = 3
        hidden_size = 16                # TODO: this parameter can be further tuned
        num_layers = 1                  # TODO: this parameter can be further tuned
        batch_size = 1
        num_epochs = 20  # 30 for 0.0001, and 20 for 0.0002
        learning_rate = 0.0002

        f = open("log_lr.txt", "a")
        f.write("\nLength: {}, Step: {}\n".format(t_l[ktr1], step[ktr1][ktr2]))
        rnn = GRU(input_size, hidden_size, num_layers, 1)
        rnn.cuda()
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
        losses = []
        ktr_in = 0
        for epoch in range(num_epochs):
            for img, lb in train_set:
                img = np.array([img,], dtype=np.float)
                img = torch.FloatTensor(img)
                img = Variable(img).cuda()

                # Forward + Backward + Optimize
                optimizer.zero_grad()
                output = rnn(img)
                crt = torch.FloatTensor(np.array([lb,], dtype=np.float))
                crt = Variable(crt).cuda()
                loss = criterion(output, crt)
                loss.backward()
Esempio n. 3
0
env.seed(args.seed)
torch.manual_seed(args.seed)

if args.use_joint_pol_val:
    ac_net = ActorCritic(num_inputs, num_actions)
    opt_ac = optim.Adam(ac_net.parameters(), lr=0.0003)
else:
    policy_net = GRU(num_inputs, num_actions, dtype=dtype).type(dtype)
    old_policy_net = GRU(num_inputs, num_actions, dtype=dtype).type(dtype)
    value_net = Value(num_inputs).type(dtype)
    reward_net = GRU(num_inputs + num_actions,
                     1,
                     policy_flag=0,
                     activation_flag=2,
                     dtype=dtype).type(dtype)
    opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003)
    opt_value = optim.Adam(value_net.parameters(), lr=0.0003)
    opt_reward = optim.Adam(reward_net.parameters(), lr=0.0003)


def create_batch_inputs(batch_states_list,
                        batch_actions_list,
                        batch_advantages_list=None):
    lengths = []
    for states in batch_states_list:
        lengths.append(states.size(0))

    max_length = max(lengths)
    batch_states = torch.zeros(len(batch_states_list), max_length,
                               num_inputs).type(dtype)
    batch_actions = torch.zeros(len(batch_actions_list), max_length,
Esempio n. 4
0
        test_set.append(all_discharge_data[i])

shapes = [(16, 1, (2, 3)), (32, 1, (1, 2, 3)), (64, 1, (1, 2, 3)),
          (32, 2, (1, 2, 3)), (64, 2, (1, 2, 3))]
strategies = [(1, 0.0001, 40), (2, 0.00015, 30), (4, 0.0002, 24),
              (8, 0.0003, 18)]

f = open("log_gru.txt", "a")

for shape in shapes:
    for option in shape[2]:
        strategy = strategies[option]
        rnn = GRU(3, shape[0], shape[1], 1)
        rnn.cuda()
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(rnn.parameters(), lr=strategy[1])
        for epoch in range(strategy[2]):
            for i in range(int(len(train_set) / strategy[0])):
                img = []
                lb = []
                for j in range(strategy[0]):
                    img.append(
                        np.array(train_set[i * strategy[0] + j].data,
                                 dtype=np.float).transpose())
                    lb.append(
                        np.array(train_set[i * strategy[0] + j].SOH,
                                 dtype=np.float))
                img = np.array(img)
                img = torch.FloatTensor(img)
                img = Variable(img).cuda()
                lb = np.array(lb)