def __init__(self, s_dim, a_dim, bound, hidden, device, lr, memory_len, batch_size, update_epoch, gamma, lambda_, epsilon): # Parameter initialization self.s_dim = s_dim self.a_dim = a_dim self.bound = bound self.hidden = hidden self.device = torch.device( device if torch.cuda.is_available() else 'cpu') self.lr = lr self.memory_len = memory_len self.batch_size = batch_size self.update_epoch = update_epoch self.gamma = gamma self.lambda_ = lambda_ self.epsilon = epsilon # network initialization self.actor = Actor(s_dim, a_dim, hidden).to(self.device) self.actor_old = Actor(s_dim, a_dim, hidden).to(self.device) self.actor_old.load_state_dict(self.actor.state_dict()) self.actor_opt = torch.optim.Adam(self.actor.parameters(), lr=self.lr) self.critic = Critic(s_dim).to(self.device) self.critic_opt = torch.optim.Adam(self.critic.parameters(), lr=self.lr) # memory initialization self.memory_s, self.memory_a, self.memory_s_, self.memory_r, self.memory_done = [], [], [], [], []
def __init__(self, state_size, action_size, random_seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # Noise process self.noise = OUNoise(action_size, random_seed) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
def __init__(self, s_dim, a_dim, device, hidden, capacity, batch_size, lr_actor, lr_critic, variance_start, variance_decay, variance_min, gamma, tau): # Parameter Initialization self.s_dim = s_dim self.a_dim = a_dim self.device = device self.hidden = hidden self.lr_actor = lr_actor self.lr_critic = lr_critic self.capacity = capacity self.batch_size = batch_size self.var = variance_start self.var_decay = variance_decay self.var_min = variance_min self.gamma = gamma self.tau = tau # Network self.actor = Actor(s_dim, hidden, a_dim).to(device) self.actor_target = Actor(s_dim, hidden, a_dim).to(device) self.actor_target.load_state_dict(self.actor.state_dict()) self.opt_actor = torch.optim.Adam(self.actor.parameters(), lr=lr_actor) self.critic = Critic(s_dim, a_dim, hidden).to(device) self.critic_target = Critic(s_dim, a_dim, hidden).to(device) self.critic_target.load_state_dict(self.critic.state_dict()) self.opt_critic = torch.optim.Adam(self.critic.parameters(), lr=lr_critic) # replay buffer, or memory self.memory = ReplayBuffer(capacity, batch_size, device)
def __init__(self, state_space, action_space): self.actor = Actor(state_space, action_space).to(device) self.critic = Critic(state_space, action_space).to(device) self.actor_target = Actor(state_space, action_space).to(device) self.critic_target = Critic(state_space, action_space).to(device) self.actor_optimiser = optim.Adam(actor.parameters(), lr=1e-3) self.critic_optimiser = optim.Adam(critic.parameters(), lr=1e-3) self.mem = ReplayBuffer(buffer_size)
def __init__(self, device, agent_num, state_shape_n, action_shape_n, gamma, tau, max_grad_norm, hidden, lr_a, lr_c, buffer_capacity, batch_size ): # hyper parameters self.device = device self.agent_num = agent_num self.state_shape_n = state_shape_n self.action_shape_n = action_shape_n self.gamma = gamma self.tau = tau self.max_grad_norm = max_grad_norm # define all the actor/critic network self.actors = [None for _ in range(self.agent_num)] self.critics = [None for _ in range(self.agent_num)] self.actors_target = [None for _ in range(self.agent_num)] self.critics_target = [None for _ in range(self.agent_num)] self.optimizers_a = [None for _ in range(self.agent_num)] self.optimizers_c = [None for _ in range(self.agent_num)] for i in range(self.agent_num): # define actor for the i-th agent self.actors[i] = Actor(state_n=state_shape_n[i], action_n=action_shape_n[i], hidden=hidden).to(self.device) self.actors_target[i] = Actor(state_n=state_shape_n[i], action_n=action_shape_n[i], hidden=hidden).to(self.device) self.actors_target[i].load_state_dict(self.actors[i].state_dict()) self.optimizers_a[i] = Adam(self.actors[i].parameters(), lr_a) # define critic for the i-th agent self.critics[i] = Critic(state_n=sum(state_shape_n), action_n=sum(action_shape_n), hidden=hidden).to(self.device) self.critics_target[i] = Critic(state_n=sum(state_shape_n), action_n=sum(action_shape_n), hidden=hidden).to(self.device) self.critics_target[i].load_state_dict(self.critics[i].state_dict()) self.optimizers_c[i] = Adam(self.critics[i].parameters(), lr_c) # define the memory self.batch_size = batch_size self.memory = ReplayBuffer(capacity=buffer_capacity, batch_size=batch_size)
def __init__( self, s_dim, a_num, device, hidden, lr_actor, lr_critic, memory_len, gamma, lambda_, ): # Parameter Initialization self.s_dim = s_dim self.a_num = a_num self.device = device self.hidden = hidden self.lr_actor = lr_actor self.lr_critic = lr_critic self.memory_len = memory_len self.gamma = gamma self.lambda_ = lambda_ # network initialization self.actor = Actor(s_dim, hidden, a_num).to(self.device) self.opt_actor = torch.optim.Adam(self.actor.parameters(), lr=lr_actor) self.critic = Critic(s_dim, hidden).to(self.device) self.opt_critic = torch.optim.Adam(self.critic.parameters(), lr=lr_critic) # no memory in this algorithm self.memory_s = [] self.memory_a = [] self.memory_s_ = [] self.memory_r = [] self.memory_done = []
def __init__(self, s_dim, a_dim, capacity, batch_size, lr_actor, lr_critic, hidden, var_init, var_decay, var_min, gamma, tau, policy_noise, noise_clip, policy_freq): # Parameter Initialization self.s_dim = s_dim self.a_dim = a_dim self.lr_actor = lr_actor self.lr_critic = lr_critic self.hidden = hidden self.capacity = capacity self.batch_size = batch_size self.var = var_init self.var_decay = var_decay self.var_min = var_min self.gamma = gamma self.tau = tau self.policy_noise = policy_noise self.noise_clip = noise_clip self.policy_freq = policy_freq self.train_it = 0 # Network self.actor = Actor(s_dim, a_dim, hidden) self.actor_target = copy.deepcopy(self.actor) self.opt_actor = torch.optim.Adam(self.actor.parameters(), lr=lr_actor) self.critic = Critic(s_dim, a_dim, hidden) self.critic_target = copy.deepcopy(self.critic) self.opt_critic = torch.optim.Adam(self.critic.parameters(), lr=lr_critic) # replay buffer, or memory self.memory = ReplayBuffer(capacity, batch_size)
def __init__( self, s_dim, a_num, device, hidden, lr_actor, lr_critic, max_len, gamma, ): # Parameter Initialization self.s_dim = s_dim self.a_num = a_num self.device = device self.hidden = hidden self.lr_actor = lr_actor self.lr_critic = lr_critic self.max_len = max_len self.gamma = gamma # network initialization self.actor = Actor(s_dim, hidden, a_num).to(self.device) self.opt_actor = torch.optim.Adam(self.actor.parameters(), lr=lr_actor) self.critic = Critic(s_dim, hidden).to(self.device) self.opt_critic = torch.optim.Adam(self.critic.parameters(), lr=lr_critic) # define memory self.memory_s = [] self.memory_a = [] self.memory_r = []
def __init__(self, s_dim, a_dim, hidden, capacity, batch_size, lr, gamma, tau, log_prob_reg): # Parameter Initialization self.s_dim = s_dim self.a_dim = a_dim self.hidden = hidden self.lr = lr self.capacity = capacity self.batch_size = batch_size self.gamma = gamma self.tau = tau self.log_prob_reg = log_prob_reg # Network self.actor = Actor(s_dim, a_dim, hidden) self.opt_actor = torch.optim.Adam(self.actor.parameters(), lr=lr) self.critic = Critic(s_dim, a_dim, hidden) self.critic_target = copy.deepcopy(self.critic) self.opt_critic = torch.optim.Adam(self.critic.parameters(), lr=lr) # alpha self.target_entropy = -a_dim self.alpha = torch.tensor(1, dtype=torch.float, requires_grad=True) self.opt_alpha = torch.optim.Adam([self.alpha], lr=lr) # replay buffer, memory self.memory = ReplayBuffer(capacity, batch_size)
def __init__(self, path, s_dim=3, a_dim=1, hidden=64, actor_lr=1e-4, critic_lr=1e-4, memory_len=64, batch_size=32, update_epoch=10, gamma=0.9, lambda_=0.95, epsilon=0.2): # Parameter initialization self.path = path self.s_dim = s_dim self.a_dim = a_dim self.hidden = hidden self.actor_lr = actor_lr self.critic_lr = critic_lr self.memory_len = memory_len self.batch_size = batch_size self.update_epoch = update_epoch self.gamma = gamma self.lambda_ = lambda_ self.epsilon = epsilon # network initialization self.actor = Actor(s_dim, a_dim, hidden) self.actor_old = Actor(s_dim, a_dim, hidden) self.actor_opt = torch.optim.Adam(self.actor.parameters(), lr=self.actor_lr) self.critic = Critic(s_dim, hidden) self.critic_opt = torch.optim.Adam(self.critic.parameters(), lr=self.critic_lr) # memory initialization self.memory_s, self.memory_a, self.memory_s_, self.memory_r, self.memory_done = [], [], [], [], [] # 是否继承以前的成果 if not os.listdir(self.path + '/Net'): # 没有以前的东西可以继承 print('init completed') else: # 继承以前的网络与记忆 print('loading completed') self.actor.load_state_dict(torch.load(self.path + '/Net/Actor.pth')) self.critic.load_state_dict( torch.load(self.path + '/Net/Critic.pth')) with open(self.path + '/Net/Memory_s.json', 'r') as f: self.memory_s = json.load(f) with open(self.path + '/Net/Memory_a.json', 'r') as f: self.memory_a = json.load(f) with open(self.path + '/Net/Memory_s_.json', 'r') as f: self.memory_s_ = json.load(f) with open(self.path + '/Net/Memory_r.json', 'r') as f: self.memory_r = json.load(f) with open(self.path + '/Net/Memory_done.json', 'r') as f: self.memory_done = json.load(f) self.actor_old.load_state_dict(self.actor.state_dict())
def simulation(methods, log_dir, simu_dir): policy = Actor(S_DIM, A_DIM) value = Critic(S_DIM, A_DIM) config = DynamicsConfig() solver = Solver() load_dir = log_dir policy.load_parameters(load_dir) value.load_parameters(load_dir) statemodel_plt = Dynamics.VehicleDynamics() plot_length = config.SIMULATION_STEPS # initial_state = torch.tensor([[0.5, 0.0, config.psi_init, 0.0, 0.0]]) # baseline = Baseline(initial_state, simu_dir) # baseline.mpcSolution() # baseline.openLoopSolution() # Open-loop reference x_init = [0.0, 0.0, config.psi_init, 0.0, 0.0] op_state, op_control = solver.openLoopMpcSolver(x_init, config.NP_TOTAL) np.savetxt(os.path.join(simu_dir, 'Open_loop_control.txt'), op_control) for method in methods: cal_time = 0 state = torch.tensor([[0.0, 0.0, config.psi_init, 0.0, 0.0]]) # state = torch.tensor([[0.0, 0.0, 0.0, 0.0, 0.0]]) state.requires_grad_(False) # x_ref = statemodel_plt.reference_trajectory(state[:, -1]) x_ref = statemodel_plt.reference_trajectory(state[:, -1]) state_r = state.detach().clone() state_r[:, 0:4] = state_r[:, 0:4] - x_ref state_history = state.detach().numpy() control_history = [] print('\nCALCULATION TIME:') for i in range(plot_length): if method == 'ADP': time_start = time.time() u = policy.forward(state_r[:, 0:4]) cal_time += time.time() - time_start elif method == 'MPC': x = state_r.tolist()[0] time_start = time.time() _, control = solver.mpcSolver(x, config.NP) # todo:retreve cal_time += time.time() - time_start u = np.array(control[0], dtype='float32').reshape(-1, config.ACTION_DIM) u = torch.from_numpy(u) else: u = np.array(op_control[i], dtype='float32').reshape(-1, config.ACTION_DIM) u = torch.from_numpy(u) state, state_r = step_relative(statemodel_plt, state, u) # state_next, deri_state, utility, F_y1, F_y2, alpha_1, alpha_2 = statemodel_plt.step(state, u) # state_r_old, _, _, _, _, _, _ = statemodel_plt.step(state_r, u) # state_r = state_r_old.detach().clone() # state_r[:, [0, 2]] = state_next[:, [0, 2]] # x_ref = statemodel_plt.reference_trajectory(state_next[:, -1]) # state_r[:, 0:4] = state_r[:, 0:4] - x_ref # state = state_next.clone().detach() # s = state_next.detach().numpy() state_history = np.append(state_history, state.detach().numpy(), axis=0) control_history = np.append(control_history, u.detach().numpy()) if method == 'ADP': print(" ADP: {:.3f}".format(cal_time) + "s") np.savetxt(os.path.join(simu_dir, 'ADP_state.txt'), state_history) np.savetxt(os.path.join(simu_dir, 'ADP_control.txt'), control_history) elif method == 'MPC': print(" MPC: {:.3f}".format(cal_time) + "s") np.savetxt(os.path.join(simu_dir, 'structured_MPC_state.txt'), state_history) np.savetxt(os.path.join(simu_dir, 'structured_MPC_control.txt'), control_history) else: np.savetxt(os.path.join(simu_dir, 'Open_loop_state.txt'), state_history) adp_simulation_plot(simu_dir) plot_comparison(simu_dir, methods)
RANDOM = False FPS = 24 FOURCC = cv2.VideoWriter_fourcc(*'XVID') WINDOW_SIZE = (700, 700) HIDDEN = 64 # define the environment env = make_env(scenario_name=ENV_NAME) NUM = env.n state_shape_n = [env.observation_space[i].shape[0] for i in range(env.n)] action_shape_n = [env.action_space[i].n for i in range(env.n)] # define the actor actors = [None for _ in range(NUM)] critics = [None for _ in range(NUM)] for i in range(NUM): actors[i] = Actor(state_n=state_shape_n[i], action_n=action_shape_n[i], hidden=HIDDEN) actors[i].load_state_dict( torch.load(PATH + str(i) + '.pt', map_location=torch.device('cpu'))) # define the video writer. videoWriter = cv2.VideoWriter('./TestVideo.avi', FOURCC, FPS, WINDOW_SIZE, True) # start to evaluate s_n = env.reset() for i in range(100): image = env.render(mode='rgb_array') image = np.array(image) image = np.reshape(image, image.shape[1:]) r, g, b = cv2.split(image) # 分解Opencv里的标准格式B、G、R image = cv2.merge([b, g, r]) videoWriter.write(image)
MAX_ITERATION = 10000 # max iterations LR_P = 8e-4 # learning rate of policy net LR_V = 3e-3 # learning rate of value net # tasks TRAIN_FLAG = 1 LOAD_PARA_FLAG = 0 SIMULATION_FLAG = 1 # Set random seed np.random.seed(0) torch.manual_seed(0) # initialize policy and value net, model of vehicle dynamics config = GeneralConfig() policy = Actor(config.STATE_DIM, config.ACTION_DIM, lr=LR_P) value = Critic(config.STATE_DIM, 1, lr=LR_V) vehicleDynamics = Dynamics.VehicleDynamics() state_batch = vehicleDynamics.initialize_state() writer = SummaryWriter() # Training iteration_index = 0 if LOAD_PARA_FLAG == 1: print( "********************************* LOAD PARAMETERS *********************************" ) # load pre-trained parameters load_dir = "./Results_dir/2020-10-09-14-42-10000" policy.load_parameters(load_dir) value.load_parameters(load_dir)
def __init__(self, path, s_dim = 3, # 状态空间维度, a_dim = 1, # 行动空间维度, hidden = 64, # 隐藏层宽度, device = 'gpu', # 训练位置, capacity = 2e3, # 记忆库大小 batch_size= 256, # 训练批次大小, start_lr_step = 512, # 开始学习的时间 gamma=0.9, # 回报折现率, var_init = 1., # variance的初始值 var_decay = 0.9999, # variance的衰减值 var_min = 0.1, # variance的最小值 actor_lr = 1e-3, # actor学习率, critic_lr = 3e-4, # critic学习率, actor_tau = 0.1, # actor更新率, critic_tau = 0.2, # critic更新率 ): # 初始化所有需要的参数 self.s_dim = s_dim self.a_dim = a_dim self.hidden = hidden # 因为我目前的测试机,无法使用gpu,所以gpu训练以后再加 self.device = torch.device(device if torch.cuda.is_available() else 'cpu') self.capacity = capacity self.batch_size = batch_size self.start_lr_step = start_lr_step self.gamma = gamma self.var = var_init self.var_decay = var_decay self.var_min = var_min self.actor_lr = actor_lr self.critic_lr = critic_lr self.actor_tau = actor_tau self.critic_tau = critic_tau # 还没有使用 self.path = path self.counter = 0 # 初始化网络 self.actor = Actor(s_dim, a_dim, hidden) self.actor_target = Actor(s_dim, a_dim, hidden) self.actor_opt = torch.optim.Adam(self.actor.parameters(), lr=self.actor_lr) self.critic = Critic(s_dim, a_dim, hidden) self.critic_target = Critic(s_dim, a_dim, hidden) self.critic_opt = torch.optim.Adam(self.critic.parameters(), lr=self.critic_lr) # 初始化记忆库 self.memory = Memory(capacity, batch_size, self.device) # 是否继承以前的成果 if not os.listdir(self.path + '/Net'): # 没有以前的东西可以继承 print('init completed') self.actor_target.load_state_dict(self.actor.state_dict()) self.critic_target.load_state_dict(self.critic.state_dict()) else: # 继承以前的网络与记忆 print('loading completed') self.actor.load_state_dict(torch.load(self.path + '/Net/Actor.pth')) self.actor_target.load_state_dict(torch.load(self.path + '/Net/Actor_Target.pth')) self.critic.load_state_dict(torch.load(self.path + '/Net/Critic.pth')) self.critic_target.load_state_dict(torch.load(self.path + '/Net/Critic_Target.pth')) with open(self.path + '/Net/Memory.json', 'r') as f: self.memory.memory = json.load(f) with open(self.path + '/Net/Counter.json', 'r') as f: self.memory.counter = json.load(f) with open(self.path + '/Net/Var.json', 'r') as f: self.var = json.load(f)