예제 #1
0
 def __init__(self, env, N_STATES, N_ACTIONS, STEPS, BATCH_SIZE):
     self.N_STATES = N_STATES
     self.N_ACTIONS = N_ACTIONS
     self.STEPS = STEPS
     self.BATCH_SIZE = BATCH_SIZE  #mini batch size
     self.critic_net = CriticNet(self.N_STATES, self.N_ACTIONS, self.STEPS,
                                 self.BATCH_SIZE)
     self.actor_net = ActorNet(self.N_STATES, self.N_ACTIONS, self.STEPS,
                               self.BATCH_SIZE)
     self.R = []
예제 #2
0
    def __init__(self, ob_shape, ac_shape, ac_max=5.0, ac_min=-5.0):
        self.num_states = ob_shape
        self.num_actions = ac_shape
        self.action_max = ac_max
        self.action_min = ac_min

        self.replay_buffer = deque()

        self.critic_net = CriticNet(self.num_states, self.num_actions,
                                    self.action_max, self.action_min)
        self.actor_net = ActorNet(self.num_states, self.num_actions,
                                  self.action_max)
예제 #3
0
파일: agent.py 프로젝트: yejunhong1/DRAG
    def __init__( self, hisar_size, ar_size, action_size, TAU = 0.001, is_batch_norm = 0, write_sum = 0, net_size_scale=1, max_load=1, beta0=beta):
        self.hisar_size  = hisar_size
        self.load_size   = action_size + 1
        self.ar_size     = ar_size
        self.state_size  = action_size * 2
        self.action_size = action_size
        self.ar_action_size = ar_size + action_size

        #print("net_size_scale: "+str(net_size_scale))
        if is_batch_norm:
            if len(CN_N_HIDDENS)==2:
                self.critic_net   = CriticNet_bn(  self.state_size, self.action_size, TAU, write_sum, net_size_scale  )
            else:
                self.critic_net   = CriticNet_bn_3(  self.state_size, self.action_size, TAU, write_sum, net_size_scale  )
            self.actor_net    = ActorNet_bn(   self.state_size, self.action_size, TAU, write_sum, net_size_scale  )
            self.ar_pred_net  = ARPredNet_bn(  self.hisar_size, self.ar_size,     write_sum, net_size_scale )           # arrival rate prediction network
            self.load_map_net = LoadMapNet_bn( self.ar_size,    self.action_size, self.load_size, write_sum, net_size_scale )           # load mapping network
        else:
            self.critic_net   = CriticNet(  self.state_size, self.action_size, TAU, write_sum, net_size_scale )
            self.actor_net    = ActorNet(   self.state_size, self.action_size, TAU, write_sum, net_size_scale )
            self.ar_pred_net  = ARPredNet(  self.hisar_size, self.ar_size,     write_sum, net_size_scale )           # arrival rate prediction network
            self.load_map_net = LoadMapNet( self.ar_size,    self.action_size, self.load_size, write_sum, net_size_scale )           # load mapping network

        self.env = ENV( action_size, max_load=max_load, beta0=beta0 )

        #self.k_nearest_neighbors = int(max_actions * k_ratio )
        #Initialize Network Buffers:
        self.replay_memory_ac  = deque()
        self.replay_memory_arp = deque()
        self.replay_memory_lm  = deque()

        #Intialize time step:
        self.time_step = 0
        self.counter   = 0
        
        action_max    = np.ones(  ( self.action_size ) ).tolist()
        action_min    = np.zeros( ( self.action_size ) ).tolist()
        action_bounds = [action_max, action_min] 
        self.grad_inv = grad_inverter( action_bounds )
예제 #4
0
    def __init__(self, num_states, num_actions, action_space_high,
                 action_space_low, is_batch_norm):

        self.num_states = num_states
        self.num_actions = num_actions
        self.action_space_high = action_space_high
        self.action_space_low = action_space_low

        # Batch normalisation disabled.
        self.critic_net = CriticNet(self.num_states, self.num_actions)
        self.actor_net = ActorNet(self.num_states, self.num_actions)

        # Replay Memory 초기화
        self.replay_memory = deque()

        # time 초기화
        self.time_step = 0
        self.counter = 0

        action_max = np.array(action_space_high).tolist()
        action_min = np.array(action_space_low).tolist()
        action_bounds = [action_max, action_min]
        self.grad_inv = grad_inverter(action_bounds)
예제 #5
0
    def __init__(self, env, is_batch_norm=False, is_grad_inverter=True):
        super().__init__(env)
        assert isinstance(env.action_space, Box), "action space must be continuous"
        if is_batch_norm:
            self.critic_net = CriticNet_bn(self.observation_space_size,
                                           self.action_space_size)
            self.actor_net = ActorNet_bn(self.observation_space_size,
                                         self.action_space_size)

        else:
            self.critic_net = CriticNet(self.observation_space_size,
                                        self.action_space_size)
            self.actor_net = ActorNet(self.observation_space_size,
                                      self.action_space_size)

        self.is_grad_inverter = is_grad_inverter
        self.replay_memory = deque()

        self.time_step = 0

        action_max = np.array(self.high).tolist()
        action_min = np.array(self.low).tolist()
        action_bounds = [action_max, action_min]
        self.grad_inv = grad_inverter(action_bounds)
예제 #6
0
    def __init__(self, env, is_batch_norm):
        self.env = env
        self.num_states = env.observation_space.shape[0]
        self.num_actions = env.action_space.shape[0]

        if is_batch_norm:
            self.critic_net = CriticNet_bn(self.num_states, self.num_actions)
            self.actor_net = ActorNet_bn(self.num_states, self.num_actions)

        else:
            self.critic_net = CriticNet(self.num_states, self.num_actions)
            self.actor_net = ActorNet(self.num_states, self.num_actions)

        #Initialize Buffer Network:
        self.replay_memory = deque()

        #Intialize time step:
        self.time_step = 0
        self.counter = 0

        action_max = np.array(env.action_space.high).tolist()
        action_min = np.array(env.action_space.low).tolist()
        action_bounds = [action_max, action_min]
        self.grad_inv = grad_inverter(action_bounds)
예제 #7
0
    def __init__(self, env, is_batch_norm):
        self.env = env
        self.num_states = 1
        self.num_actions = 3

        if is_batch_norm:
            self.critic_net = CriticNet_bn(self.num_states, self.num_actions)
            self.actor_net = ActorNet_bn(self.num_states, self.num_actions)

        else:
            self.critic_net = CriticNet(self.num_states, self.num_actions)
            self.actor_net = ActorNet(self.num_states, self.num_actions)

        #Initialize Buffer Network:
        self.replay_memory = deque()

        #Intialize time step:
        self.time_step = 0
        self.counter = 0

        action_max = [75 + 210, 10 + 160]
        action_min = [75, 10]
        action_bounds = [action_max, action_min]
        self.grad_inv = grad_inverter(action_bounds)
예제 #8
0
 def __init__(self,env, is_batch_norm):
     self.env = env 
     self.num_states = 32*16
     self.num_actions = 2
     
     
     if is_batch_norm:
         self.critic_net = CriticNet_bn(self.num_states, self.num_actions) 
         self.actor_net = ActorNet_bn(self.num_states, self.num_actions)
         
     else:
         self.critic_net = CriticNet(self.num_states, self.num_actions) 
         self.actor_net = ActorNet(self.num_states, self.num_actions)
     
     #Initialize Buffer Network:
     self.replay_memory = deque()
     
     #Intialize time step:
     self.time_step = 0
     self.counter = 0
     
    
     action_bounds = [[1., 1.],[-1.,-1.]]
     self.grad_inv = grad_inverter(action_bounds)
예제 #9
0
RENDER_ENV = True
GYM_MONITOR_EN = True
ENV_NAME = 'Pendulum-v0'
ONITOR_DIR = './results/gym_ddpg'
ACTION_BOUND = 2
ou = OU()

if __name__ == '__main__':

    env = gym.make(ENV_NAME).env

    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    action_bound = env.action_space.high[0]
    actor = ActorNet(state_dim, HIDDEN1_UNITS, HIDDEN2_UNITS, action_dim)
    critic = CriticNet(state_dim, action_dim, HIDDEN1_UNITS, HIDDEN2_UNITS,
                       HIDDEN2_UNITS, action_dim)
    buff = Memory(BUFFER_SIZE, 9)
    step = 0
    reward_result = []

    for i in range(MAX_EPISODES):

        s_t = env.reset()
        s_t = np.reshape(s_t, (1, 3))[0]
        total_reward = 0.
        for j in range(MAX_EP_STEPS):
            loss = 0
            if RENDER_ENV:
                env.render()
            a_t = actor.predict(s_t, ACTION_BOUND, target=False)
            action = a_t + ou.sample(a_t[0])