env = gym.make(args.env_name) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.shape[0] env.seed(args.seed) torch.manual_seed(args.seed) if args.use_joint_pol_val: ac_net = ActorCritic(num_inputs, num_actions) opt_ac = optim.Adam(ac_net.parameters(), lr=0.0003) else: policy_net = GRU(num_inputs, num_actions) old_policy_net = GRU(num_inputs, num_actions) value_net = Value(num_inputs) opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003) opt_value = optim.Adam(value_net.parameters(), lr=0.0003) def create_batch_inputs(batch_states_list, batch_actions_list, batch_advantages_list, batch_targets_list): lengths = [] for states in batch_states_list: lengths.append(states.size(0)) max_length = max(lengths) batch_states = torch.zeros(len(batch_states_list), max_length, num_inputs) batch_actions = torch.zeros(len(batch_actions_list), max_length, num_actions) batch_advantages = torch.zeros(len(batch_advantages_list), max_length) batch_mask = []
test_set.append(data_set[t]) sequence_length = 401 input_size = 3 hidden_size = 16 # TODO: this parameter can be further tuned num_layers = 1 # TODO: this parameter can be further tuned batch_size = 1 num_epochs = 20 # 30 for 0.0001, and 20 for 0.0002 learning_rate = 0.0002 f = open("log_lr.txt", "a") f.write("\nLength: {}, Step: {}\n".format(t_l[ktr1], step[ktr1][ktr2])) rnn = GRU(input_size, hidden_size, num_layers, 1) rnn.cuda() criterion = nn.MSELoss() optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) losses = [] ktr_in = 0 for epoch in range(num_epochs): for img, lb in train_set: img = np.array([img,], dtype=np.float) img = torch.FloatTensor(img) img = Variable(img).cuda() # Forward + Backward + Optimize optimizer.zero_grad() output = rnn(img) crt = torch.FloatTensor(np.array([lb,], dtype=np.float)) crt = Variable(crt).cuda() loss = criterion(output, crt) loss.backward()
env.seed(args.seed) torch.manual_seed(args.seed) if args.use_joint_pol_val: ac_net = ActorCritic(num_inputs, num_actions) opt_ac = optim.Adam(ac_net.parameters(), lr=0.0003) else: policy_net = GRU(num_inputs, num_actions, dtype=dtype).type(dtype) old_policy_net = GRU(num_inputs, num_actions, dtype=dtype).type(dtype) value_net = Value(num_inputs).type(dtype) reward_net = GRU(num_inputs + num_actions, 1, policy_flag=0, activation_flag=2, dtype=dtype).type(dtype) opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003) opt_value = optim.Adam(value_net.parameters(), lr=0.0003) opt_reward = optim.Adam(reward_net.parameters(), lr=0.0003) def create_batch_inputs(batch_states_list, batch_actions_list, batch_advantages_list=None): lengths = [] for states in batch_states_list: lengths.append(states.size(0)) max_length = max(lengths) batch_states = torch.zeros(len(batch_states_list), max_length, num_inputs).type(dtype) batch_actions = torch.zeros(len(batch_actions_list), max_length,
test_set.append(all_discharge_data[i]) shapes = [(16, 1, (2, 3)), (32, 1, (1, 2, 3)), (64, 1, (1, 2, 3)), (32, 2, (1, 2, 3)), (64, 2, (1, 2, 3))] strategies = [(1, 0.0001, 40), (2, 0.00015, 30), (4, 0.0002, 24), (8, 0.0003, 18)] f = open("log_gru.txt", "a") for shape in shapes: for option in shape[2]: strategy = strategies[option] rnn = GRU(3, shape[0], shape[1], 1) rnn.cuda() criterion = nn.MSELoss() optimizer = torch.optim.Adam(rnn.parameters(), lr=strategy[1]) for epoch in range(strategy[2]): for i in range(int(len(train_set) / strategy[0])): img = [] lb = [] for j in range(strategy[0]): img.append( np.array(train_set[i * strategy[0] + j].data, dtype=np.float).transpose()) lb.append( np.array(train_set[i * strategy[0] + j].SOH, dtype=np.float)) img = np.array(img) img = torch.FloatTensor(img) img = Variable(img).cuda() lb = np.array(lb)