def __init__(self, s_dim, a_dim, bound, hidden, device, lr, memory_len,
                 batch_size, update_epoch, gamma, lambda_, epsilon):
        # Parameter initialization
        self.s_dim = s_dim
        self.a_dim = a_dim
        self.bound = bound
        self.hidden = hidden
        self.device = torch.device(
            device if torch.cuda.is_available() else 'cpu')
        self.lr = lr
        self.memory_len = memory_len
        self.batch_size = batch_size
        self.update_epoch = update_epoch
        self.gamma = gamma
        self.lambda_ = lambda_
        self.epsilon = epsilon

        # network initialization
        self.actor = Actor(s_dim, a_dim, hidden).to(self.device)
        self.actor_old = Actor(s_dim, a_dim, hidden).to(self.device)
        self.actor_old.load_state_dict(self.actor.state_dict())
        self.actor_opt = torch.optim.Adam(self.actor.parameters(), lr=self.lr)
        self.critic = Critic(s_dim).to(self.device)
        self.critic_opt = torch.optim.Adam(self.critic.parameters(),
                                           lr=self.lr)

        # memory initialization
        self.memory_s, self.memory_a, self.memory_s_, self.memory_r, self.memory_done = [], [], [], [], []
Exemple #2
0
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size, random_seed).to(device)
        self.critic_target = Critic(state_size, action_size, random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
Exemple #3
0
    def __init__(self, s_dim, a_dim, device, hidden, capacity, batch_size,
                 lr_actor, lr_critic, variance_start, variance_decay,
                 variance_min, gamma, tau):
        # Parameter Initialization
        self.s_dim = s_dim
        self.a_dim = a_dim
        self.device = device
        self.hidden = hidden
        self.lr_actor = lr_actor
        self.lr_critic = lr_critic
        self.capacity = capacity
        self.batch_size = batch_size
        self.var = variance_start
        self.var_decay = variance_decay
        self.var_min = variance_min
        self.gamma = gamma
        self.tau = tau

        # Network
        self.actor = Actor(s_dim, hidden, a_dim).to(device)
        self.actor_target = Actor(s_dim, hidden, a_dim).to(device)
        self.actor_target.load_state_dict(self.actor.state_dict())
        self.opt_actor = torch.optim.Adam(self.actor.parameters(), lr=lr_actor)
        self.critic = Critic(s_dim, a_dim, hidden).to(device)
        self.critic_target = Critic(s_dim, a_dim, hidden).to(device)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.opt_critic = torch.optim.Adam(self.critic.parameters(),
                                           lr=lr_critic)

        # replay buffer, or memory
        self.memory = ReplayBuffer(capacity, batch_size, device)
Exemple #4
0
    def __init__(self, state_space, action_space):
        self.actor = Actor(state_space, action_space).to(device)
        self.critic = Critic(state_space, action_space).to(device)

        self.actor_target = Actor(state_space, action_space).to(device)
        self.critic_target = Critic(state_space, action_space).to(device)

        self.actor_optimiser = optim.Adam(actor.parameters(), lr=1e-3)
        self.critic_optimiser = optim.Adam(critic.parameters(), lr=1e-3)

        self.mem = ReplayBuffer(buffer_size)
Exemple #5
0
 def __init__(self,
              device,
              agent_num,
              state_shape_n,
              action_shape_n,
              gamma,
              tau,
              max_grad_norm,
              hidden,
              lr_a,
              lr_c,
              buffer_capacity,
              batch_size
              ):
     # hyper parameters
     self.device = device
     self.agent_num = agent_num
     self.state_shape_n = state_shape_n
     self.action_shape_n = action_shape_n
     self.gamma = gamma
     self.tau = tau
     self.max_grad_norm = max_grad_norm
     # define all the actor/critic network
     self.actors = [None for _ in range(self.agent_num)]
     self.critics = [None for _ in range(self.agent_num)]
     self.actors_target = [None for _ in range(self.agent_num)]
     self.critics_target = [None for _ in range(self.agent_num)]
     self.optimizers_a = [None for _ in range(self.agent_num)]
     self.optimizers_c = [None for _ in range(self.agent_num)]
     for i in range(self.agent_num):
         # define actor for the i-th agent
         self.actors[i] = Actor(state_n=state_shape_n[i],
                                action_n=action_shape_n[i],
                                hidden=hidden).to(self.device)
         self.actors_target[i] = Actor(state_n=state_shape_n[i],
                                       action_n=action_shape_n[i],
                                       hidden=hidden).to(self.device)
         self.actors_target[i].load_state_dict(self.actors[i].state_dict())
         self.optimizers_a[i] = Adam(self.actors[i].parameters(), lr_a)
         # define critic for the i-th agent
         self.critics[i] = Critic(state_n=sum(state_shape_n),
                                  action_n=sum(action_shape_n),
                                  hidden=hidden).to(self.device)
         self.critics_target[i] = Critic(state_n=sum(state_shape_n),
                                  action_n=sum(action_shape_n),
                                  hidden=hidden).to(self.device)
         self.critics_target[i].load_state_dict(self.critics[i].state_dict())
         self.optimizers_c[i] = Adam(self.critics[i].parameters(), lr_c)
     # define the memory
     self.batch_size = batch_size
     self.memory = ReplayBuffer(capacity=buffer_capacity, batch_size=batch_size)
    def __init__(
            self,
            s_dim,
            a_num,
            device,
            hidden,
            lr_actor,
            lr_critic,
            memory_len,
            gamma,
            lambda_,
    ):
        # Parameter Initialization
        self.s_dim = s_dim
        self.a_num = a_num
        self.device = device
        self.hidden = hidden
        self.lr_actor = lr_actor
        self.lr_critic = lr_critic
        self.memory_len = memory_len
        self.gamma = gamma
        self.lambda_ = lambda_

        # network initialization
        self.actor = Actor(s_dim, hidden, a_num).to(self.device)
        self.opt_actor = torch.optim.Adam(self.actor.parameters(), lr=lr_actor)
        self.critic = Critic(s_dim, hidden).to(self.device)
        self.opt_critic = torch.optim.Adam(self.critic.parameters(), lr=lr_critic)

        # no memory in this algorithm
        self.memory_s = []
        self.memory_a = []
        self.memory_s_ = []
        self.memory_r = []
        self.memory_done = []
Exemple #7
0
    def __init__(self, s_dim, a_dim, capacity, batch_size, lr_actor, lr_critic,
                 hidden, var_init, var_decay, var_min, gamma, tau,
                 policy_noise, noise_clip, policy_freq):
        # Parameter Initialization
        self.s_dim = s_dim
        self.a_dim = a_dim
        self.lr_actor = lr_actor
        self.lr_critic = lr_critic
        self.hidden = hidden
        self.capacity = capacity
        self.batch_size = batch_size
        self.var = var_init
        self.var_decay = var_decay
        self.var_min = var_min
        self.gamma = gamma
        self.tau = tau
        self.policy_noise = policy_noise
        self.noise_clip = noise_clip
        self.policy_freq = policy_freq
        self.train_it = 0

        # Network
        self.actor = Actor(s_dim, a_dim, hidden)
        self.actor_target = copy.deepcopy(self.actor)
        self.opt_actor = torch.optim.Adam(self.actor.parameters(), lr=lr_actor)

        self.critic = Critic(s_dim, a_dim, hidden)
        self.critic_target = copy.deepcopy(self.critic)
        self.opt_critic = torch.optim.Adam(self.critic.parameters(),
                                           lr=lr_critic)

        # replay buffer, or memory
        self.memory = ReplayBuffer(capacity, batch_size)
Exemple #8
0
    def __init__(
        self,
        s_dim,
        a_num,
        device,
        hidden,
        lr_actor,
        lr_critic,
        max_len,
        gamma,
    ):
        # Parameter Initialization
        self.s_dim = s_dim
        self.a_num = a_num
        self.device = device
        self.hidden = hidden
        self.lr_actor = lr_actor
        self.lr_critic = lr_critic
        self.max_len = max_len
        self.gamma = gamma

        # network initialization
        self.actor = Actor(s_dim, hidden, a_num).to(self.device)
        self.opt_actor = torch.optim.Adam(self.actor.parameters(), lr=lr_actor)
        self.critic = Critic(s_dim, hidden).to(self.device)
        self.opt_critic = torch.optim.Adam(self.critic.parameters(),
                                           lr=lr_critic)

        # define memory
        self.memory_s = []
        self.memory_a = []
        self.memory_r = []
    def __init__(self, s_dim, a_dim, hidden, capacity, batch_size, lr, gamma,
                 tau, log_prob_reg):
        # Parameter Initialization
        self.s_dim = s_dim
        self.a_dim = a_dim
        self.hidden = hidden
        self.lr = lr
        self.capacity = capacity
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau
        self.log_prob_reg = log_prob_reg

        # Network
        self.actor = Actor(s_dim, a_dim, hidden)
        self.opt_actor = torch.optim.Adam(self.actor.parameters(), lr=lr)
        self.critic = Critic(s_dim, a_dim, hidden)
        self.critic_target = copy.deepcopy(self.critic)
        self.opt_critic = torch.optim.Adam(self.critic.parameters(), lr=lr)
        # alpha
        self.target_entropy = -a_dim
        self.alpha = torch.tensor(1, dtype=torch.float, requires_grad=True)
        self.opt_alpha = torch.optim.Adam([self.alpha], lr=lr)
        # replay buffer, memory
        self.memory = ReplayBuffer(capacity, batch_size)
Exemple #10
0
    def __init__(self,
                 path,
                 s_dim=3,
                 a_dim=1,
                 hidden=64,
                 actor_lr=1e-4,
                 critic_lr=1e-4,
                 memory_len=64,
                 batch_size=32,
                 update_epoch=10,
                 gamma=0.9,
                 lambda_=0.95,
                 epsilon=0.2):
        # Parameter initialization
        self.path = path
        self.s_dim = s_dim
        self.a_dim = a_dim
        self.hidden = hidden
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.memory_len = memory_len
        self.batch_size = batch_size
        self.update_epoch = update_epoch
        self.gamma = gamma
        self.lambda_ = lambda_
        self.epsilon = epsilon

        # network initialization
        self.actor = Actor(s_dim, a_dim, hidden)
        self.actor_old = Actor(s_dim, a_dim, hidden)
        self.actor_opt = torch.optim.Adam(self.actor.parameters(),
                                          lr=self.actor_lr)
        self.critic = Critic(s_dim, hidden)
        self.critic_opt = torch.optim.Adam(self.critic.parameters(),
                                           lr=self.critic_lr)

        # memory initialization
        self.memory_s, self.memory_a, self.memory_s_, self.memory_r, self.memory_done = [], [], [], [], []

        # 是否继承以前的成果
        if not os.listdir(self.path + '/Net'):
            # 没有以前的东西可以继承
            print('init completed')
        else:
            # 继承以前的网络与记忆
            print('loading completed')
            self.actor.load_state_dict(torch.load(self.path +
                                                  '/Net/Actor.pth'))
            self.critic.load_state_dict(
                torch.load(self.path + '/Net/Critic.pth'))
            with open(self.path + '/Net/Memory_s.json', 'r') as f:
                self.memory_s = json.load(f)
            with open(self.path + '/Net/Memory_a.json', 'r') as f:
                self.memory_a = json.load(f)
            with open(self.path + '/Net/Memory_s_.json', 'r') as f:
                self.memory_s_ = json.load(f)
            with open(self.path + '/Net/Memory_r.json', 'r') as f:
                self.memory_r = json.load(f)
            with open(self.path + '/Net/Memory_done.json', 'r') as f:
                self.memory_done = json.load(f)
        self.actor_old.load_state_dict(self.actor.state_dict())
Exemple #11
0
def simulation(methods, log_dir, simu_dir):
    policy = Actor(S_DIM, A_DIM)
    value = Critic(S_DIM, A_DIM)
    config = DynamicsConfig()
    solver = Solver()
    load_dir = log_dir
    policy.load_parameters(load_dir)
    value.load_parameters(load_dir)
    statemodel_plt = Dynamics.VehicleDynamics()
    plot_length = config.SIMULATION_STEPS

    # initial_state = torch.tensor([[0.5, 0.0, config.psi_init, 0.0, 0.0]])
    # baseline = Baseline(initial_state, simu_dir)
    # baseline.mpcSolution()
    # baseline.openLoopSolution()

    # Open-loop reference
    x_init = [0.0, 0.0, config.psi_init, 0.0, 0.0]
    op_state, op_control = solver.openLoopMpcSolver(x_init, config.NP_TOTAL)
    np.savetxt(os.path.join(simu_dir, 'Open_loop_control.txt'), op_control)

    for method in methods:
        cal_time = 0
        state = torch.tensor([[0.0, 0.0, config.psi_init, 0.0, 0.0]])
        # state = torch.tensor([[0.0, 0.0, 0.0, 0.0, 0.0]])
        state.requires_grad_(False)
        # x_ref = statemodel_plt.reference_trajectory(state[:, -1])
        x_ref = statemodel_plt.reference_trajectory(state[:, -1])
        state_r = state.detach().clone()
        state_r[:, 0:4] = state_r[:, 0:4] - x_ref

        state_history = state.detach().numpy()
        control_history = []

        print('\nCALCULATION TIME:')
        for i in range(plot_length):
            if method == 'ADP':
                time_start = time.time()
                u = policy.forward(state_r[:, 0:4])
                cal_time += time.time() - time_start
            elif method == 'MPC':
                x = state_r.tolist()[0]
                time_start = time.time()
                _, control = solver.mpcSolver(x, config.NP)  # todo:retreve
                cal_time += time.time() - time_start
                u = np.array(control[0],
                             dtype='float32').reshape(-1, config.ACTION_DIM)
                u = torch.from_numpy(u)
            else:
                u = np.array(op_control[i],
                             dtype='float32').reshape(-1, config.ACTION_DIM)
                u = torch.from_numpy(u)

            state, state_r = step_relative(statemodel_plt, state, u)
            # state_next, deri_state, utility, F_y1, F_y2, alpha_1, alpha_2 = statemodel_plt.step(state, u)
            # state_r_old, _, _, _, _, _, _ = statemodel_plt.step(state_r, u)
            # state_r = state_r_old.detach().clone()
            # state_r[:, [0, 2]] = state_next[:, [0, 2]]
            # x_ref = statemodel_plt.reference_trajectory(state_next[:, -1])
            # state_r[:, 0:4] = state_r[:, 0:4] - x_ref
            # state = state_next.clone().detach()
            # s = state_next.detach().numpy()
            state_history = np.append(state_history,
                                      state.detach().numpy(),
                                      axis=0)
            control_history = np.append(control_history, u.detach().numpy())

        if method == 'ADP':
            print(" ADP: {:.3f}".format(cal_time) + "s")
            np.savetxt(os.path.join(simu_dir, 'ADP_state.txt'), state_history)
            np.savetxt(os.path.join(simu_dir, 'ADP_control.txt'),
                       control_history)

        elif method == 'MPC':
            print(" MPC: {:.3f}".format(cal_time) + "s")
            np.savetxt(os.path.join(simu_dir, 'structured_MPC_state.txt'),
                       state_history)
            np.savetxt(os.path.join(simu_dir, 'structured_MPC_control.txt'),
                       control_history)

        else:
            np.savetxt(os.path.join(simu_dir, 'Open_loop_state.txt'),
                       state_history)

    adp_simulation_plot(simu_dir)
    plot_comparison(simu_dir, methods)
RANDOM = False
FPS = 24
FOURCC = cv2.VideoWriter_fourcc(*'XVID')
WINDOW_SIZE = (700, 700)
HIDDEN = 64
# define the environment
env = make_env(scenario_name=ENV_NAME)
NUM = env.n
state_shape_n = [env.observation_space[i].shape[0] for i in range(env.n)]
action_shape_n = [env.action_space[i].n for i in range(env.n)]
# define the actor
actors = [None for _ in range(NUM)]
critics = [None for _ in range(NUM)]
for i in range(NUM):
    actors[i] = Actor(state_n=state_shape_n[i],
                      action_n=action_shape_n[i],
                      hidden=HIDDEN)
    actors[i].load_state_dict(
        torch.load(PATH + str(i) + '.pt', map_location=torch.device('cpu')))
# define the video writer.
videoWriter = cv2.VideoWriter('./TestVideo.avi', FOURCC, FPS, WINDOW_SIZE,
                              True)
# start to evaluate
s_n = env.reset()
for i in range(100):
    image = env.render(mode='rgb_array')
    image = np.array(image)
    image = np.reshape(image, image.shape[1:])
    r, g, b = cv2.split(image)  # 分解Opencv里的标准格式B、G、R
    image = cv2.merge([b, g, r])
    videoWriter.write(image)
MAX_ITERATION = 10000  # max iterations
LR_P = 8e-4  # learning rate of policy net
LR_V = 3e-3  # learning rate of value net

# tasks
TRAIN_FLAG = 1
LOAD_PARA_FLAG = 0
SIMULATION_FLAG = 1

# Set random seed
np.random.seed(0)
torch.manual_seed(0)

# initialize policy and value net, model of vehicle dynamics
config = GeneralConfig()
policy = Actor(config.STATE_DIM, config.ACTION_DIM, lr=LR_P)
value = Critic(config.STATE_DIM, 1, lr=LR_V)
vehicleDynamics = Dynamics.VehicleDynamics()
state_batch = vehicleDynamics.initialize_state()
writer = SummaryWriter()

# Training
iteration_index = 0
if LOAD_PARA_FLAG == 1:
    print(
        "********************************* LOAD PARAMETERS *********************************"
    )
    # load pre-trained parameters
    load_dir = "./Results_dir/2020-10-09-14-42-10000"
    policy.load_parameters(load_dir)
    value.load_parameters(load_dir)
Exemple #14
0
    def __init__(self,
                 path,
                 s_dim = 3,           # 状态空间维度,
                 a_dim = 1,            # 行动空间维度,
                 hidden = 64,          # 隐藏层宽度,
                 device = 'gpu',       # 训练位置,
                 capacity = 2e3,       # 记忆库大小
                 batch_size= 256,      # 训练批次大小,
                 start_lr_step = 512,  # 开始学习的时间
                 gamma=0.9,            # 回报折现率,
                 var_init = 1.,        # variance的初始值
                 var_decay = 0.9999,   # variance的衰减值
                 var_min = 0.1,        # variance的最小值
                 actor_lr = 1e-3,      # actor学习率,
                 critic_lr = 3e-4,     # critic学习率,
                 actor_tau = 0.1,      # actor更新率,
                 critic_tau = 0.2,     # critic更新率
    ):
        # 初始化所有需要的参数
        self.s_dim = s_dim
        self.a_dim = a_dim
        self.hidden = hidden
        # 因为我目前的测试机,无法使用gpu,所以gpu训练以后再加
        self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
        self.capacity = capacity
        self.batch_size = batch_size
        self.start_lr_step = start_lr_step
        self.gamma = gamma
        self.var = var_init
        self.var_decay = var_decay
        self.var_min = var_min
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.actor_tau = actor_tau
        self.critic_tau = critic_tau
        # 还没有使用
        self.path = path
        self.counter = 0

        # 初始化网络
        self.actor = Actor(s_dim, a_dim, hidden)
        self.actor_target = Actor(s_dim, a_dim, hidden)
        self.actor_opt = torch.optim.Adam(self.actor.parameters(), lr=self.actor_lr)
        self.critic = Critic(s_dim, a_dim, hidden)
        self.critic_target = Critic(s_dim, a_dim, hidden)
        self.critic_opt = torch.optim.Adam(self.critic.parameters(), lr=self.critic_lr)

        # 初始化记忆库
        self.memory = Memory(capacity, batch_size, self.device)

        # 是否继承以前的成果
        if not os.listdir(self.path + '/Net'):
            # 没有以前的东西可以继承
            print('init completed')
            self.actor_target.load_state_dict(self.actor.state_dict())
            self.critic_target.load_state_dict(self.critic.state_dict())
        else:
            # 继承以前的网络与记忆
            print('loading completed')
            self.actor.load_state_dict(torch.load(self.path + '/Net/Actor.pth'))
            self.actor_target.load_state_dict(torch.load(self.path + '/Net/Actor_Target.pth'))
            self.critic.load_state_dict(torch.load(self.path + '/Net/Critic.pth'))
            self.critic_target.load_state_dict(torch.load(self.path + '/Net/Critic_Target.pth'))
            with open(self.path + '/Net/Memory.json', 'r') as f:
                self.memory.memory = json.load(f)
            with open(self.path + '/Net/Counter.json', 'r') as f:
                self.memory.counter = json.load(f)
            with open(self.path + '/Net/Var.json', 'r') as f:
                self.var = json.load(f)