コード例 #1
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.3
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        self.buffer_size = 1000000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        self.gamma = 0.99
        self.tau = 0.001 
コード例 #2
0
    def __init__(self, cfg):
        # Environment configuration
        self.action_shape = cfg['env']['action_shape']

        # Replay memory
        cfg['agent']['memory']['action_shape'] = self.action_shape
        self.memory = ReplayBuffer(**cfg['agent']['memory'])

        # Algorithm parameters
        self.exploration_mu, self.exploration_sigma = cfg['agent']['noise']

        self.gamma = cfg['agent']['gamma']
        self.tau = cfg['agent']['tau']

        state_flatten_shape = [np.prod(self.memory.flatten_state_shape)]
        # Actor Model
        self.actor = Actor(state_flatten_shape, self.action_shape,
                           cfg['env']['action_range'], self.tau,
                           self.memory.batch_size, cfg['actor'])

        # Critic Model
        self.critic = Critic(state_flatten_shape, self.action_shape, self.tau,
                             cfg['critic'])

        # Flag & Counter
        self.training = True
        self.episode = 0
        self.max_episode_explore = cfg['agent']['explore']
コード例 #3
0
ファイル: agent.py プロジェクト: DunnyDon/UdacityML
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.2
        self.exploration_sigma = 0.33
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 128
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.9  # discount factor
        self.tau = 0.0015  # for soft update of target parameters
コード例 #4
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high
        
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())
        
        self.exploration_mu = 0
        self.exploration_theta = 0.10 # same direction
        self.exploration_sigma = 0.001 # random noise
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        self.gamma = 0.90  # discount factor
        self.tau = 0.1  # for soft update of target parameters

        self.best_score = -np.inf
        self.score = 0
コード例 #5
0
ファイル: agent.py プロジェクト: spb07/RL-Quadcopter-2
    def __init__(self, task, gamma=0.99 , tau=0.01, buffer_size=100000, batch_size=64 ):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0        # mean/equilibrium value
        self.exploration_theta = 0.15 #0.6 #0.15  # how fast the variable returns towards the mean
        self.exploration_sigma = 0.2 #0.3 #0.2   # volatility of process
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = buffer_size
        self.batch_size = batch_size        
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
        
        
        #Algorithm parameters
        self.gamma = gamma
        self.tau = tau
コード例 #6
0
 def __init__(self, task):
     self.task = task
     self.state_size = task.state_size
     self.action_size = task.action_size
     self.action_low = task.action_low
     self.action_high = task.action_high
     # Create the actor instances for local and target
     self.actor_local = Actor(self.state_size, self.action_size,
                              self.action_low, self.action_high)
     self.actor_target = Actor(self.state_size, self.action_size,
                               self.action_low, self.action_high)
     # Create the critic instances for local and target
     self.critic_local = Critic(self.state_size, self.action_size)
     self.critic_target = Critic(self.state_size, self.action_size)
     # LOAD the weights
     self.critic_target.model.set_weights(
         self.critic_local.model.get_weights())
     self.actor_target.model.set_weights(
         self.actor_local.model.get_weights())
     # Noise hyperparameters
     self.exploration_mu = 0
     self.exploration_theta = 0.35
     self.exploration_sigma = 0.1
     self.noise = OUNoise(self.action_size, self.exploration_mu,
                          self.exploration_theta, self.exploration_sigma)
     # Set the replay memory
     self.buffer_size = 100000
     self.batch_size = 32
     self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
     # Update function hyperparameters
     self.gamma = 0.99
     self.tau = 0.001
コード例 #7
0
    def __init__(self,
                 task,
                 buffer_size=100000,
                 batch_size=64,
                 gamma=0.99,
                 tau=0.01):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size,
                                 self.action_size,
                                 self.action_low,
                                 self.action_high,
                                 learning_rate=1e-3)
        self.actor_target = Actor(self.state_size,
                                  self.action_size,
                                  self.action_low,
                                  self.action_high,
                                  learning_rate=1e-3)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size,
                                   self.action_size,
                                   learning_rate=1e-4)
        self.critic_target = Critic(self.state_size,
                                    self.action_size,
                                    learning_rate=1e-4)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.noise = OUNoise(size=self.action_size)

        # Replay memory
        self.buffer_size = buffer_size
        self.batch_size = batch_size  # 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = gamma  # 0.99 discount factor
        self.tau = tau  # 0.01 for soft update of target parameters

        # 初始化
        self.last_state = None
        self.total_reward = 0.0

        # Score tracker and learning parameters
        self.score = 0
        self.best_score = -np.inf
        self.count = 0
コード例 #8
0
ファイル: agent.py プロジェクト: SiweiWang/RL-Quadcopter
    def __init__(self, task, train=True):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high


        # Set the learning rate suggested by paper:  https://pdfs.semanticscholar.org/71f2/03de1a53deae81a7707143f0ed564661e279.pdf
        self.actor_learning_rate = 0.001
        self.actor_decay = 0.0
        self.critic_learning_rate = 0.001
        self.critic_decay = 0.0

        # Actor Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_learning_rate, self.actor_decay)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_learning_rate, self.actor_decay)

        # Critic Model
        self.critic_local = Critic(self.state_size, self.action_size, self.critic_learning_rate, self.critic_decay)
        self.critic_target = Critic(self.state_size, self.action_size, self.critic_learning_rate, self.critic_decay)

        # initialize targets model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        # self.exploration_theta = 0.15
        # self.exploration_sigma = 0.2
        self.exploration_theta = 0.01
        self.exploration_sigma = 0.02
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta,
                   self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000

        self.batch_size = 64

        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        self.best_w = None
        self.best_score = -np.inf
        # self.noise_scale = 0.7
        self.score = 0

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01 # for soft update of target parameters

        # Indicate if we want to learn (or use to predict without learn)
        self.set_train(train)
コード例 #9
0
    def __init__(self, task, seed=None, render=False):

        self.env = task.env
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        self.total_reward = 0
        self.steps = 0
        self.action_repeat = 3
        self.render = render

        # Score tracker and learning parameters
        self.score = -np.inf
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1

        #counter
        self.count = 0

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(1, self.exploration_mu, self.exploration_theta,
                             self.exploration_sigma)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters
コード例 #10
0
ファイル: ddpg.py プロジェクト: Jwata/RL-Quadcopter-2
    def __init__(self,
                 task,
                 actor_learning_rate=0.001,
                 critic_learning_rate=0.001,
                 tau=0.01,
                 gamma=0.99,
                 buffer_size=100000,
                 batch_size=64,
                 exploration_mu=0,
                 exploration_theta=0.15,
                 exploration_sigma=0.2):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high,
                                 actor_learning_rate)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high,
                                  actor_learning_rate)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size,
                                   critic_learning_rate)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    critic_learning_rate)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = exploration_mu
        self.exploration_theta = exploration_theta
        self.exploration_sigma = exploration_sigma
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = gamma  # discount factor
        self.tau = tau  # for soft update of target parameters
コード例 #11
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2

        #self.exploration_mu = 0.0001
        #self.exploration_theta = 0.2
        #self.exploration_sigma = 0.25

        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters
        #self.tau = .005

        self.closeCount = 0

        # Score tracker #####################
        self.best_score = -np.inf
        self.total_reward = 0.0

        self.currentclose = 0
コード例 #12
0
    def __init__(self, task, sess):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high
        self.action_range = self.action_high - self.action_low

        # Algorithm parameters
        self.gamma = 0.9  # discount factor
        self.tau = 2e-3  # for soft update of target parameters
        self.actor_lr = 2e-3
        self.critic_lr = 2e-3
        # END
        self.reward_variance = RunningVariance(1)
        self.q_values_variance = RunningVariance(1)

        # Actor (Policy) Model
        self.actor_local = Actor(sess, self.state_size, self.action_size,
                                 self.action_low, self.action_high,
                                 self.actor_lr)
        self.actor_target = Actor(sess, self.state_size, self.action_size,
                                  self.action_low, self.action_high,
                                  self.actor_lr)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size,
                                   self.critic_lr)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    self.critic_lr)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.8
        self.exploration_sigma = 0.05 * self.action_range
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 100
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # initialize
        sess.run(tf.global_variables_initializer())
コード例 #13
0
ファイル: agent.py プロジェクト: thenhz/RL-Quadcopter-2
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        actor_local_params = actor_params()
        actor_target_params = actor_params()
        self.actor_local = Actor(self.state_size,
                                 self.action_size,
                                 self.action_low,
                                 self.action_high,
                                 params=actor_local_params)
        self.actor_target = Actor(self.state_size,
                                  self.action_size,
                                  self.action_low,
                                  self.action_high,
                                  params=actor_target_params)

        # Critic (Value) Model
        critic_local_params = critic_params()
        critic_target_params = critic_params()
        self.critic_local = Critic(self.state_size,
                                   self.action_size,
                                   params=critic_local_params)
        self.critic_target = Critic(self.state_size,
                                    self.action_size,
                                    params=critic_target_params)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        agent_par = agent_params(self.action_size)
        # Noise process
        self.noise = agent_par.noise
        # Replay memory
        self.batch_size = agent_par.batch_size
        self.memory = agent_par.memory
        # Algorithm parameters
        self.gamma = agent_par.gamma  # discount factor
        self.tau = agent_par.tau  # for soft update of target parameters

        # Compute the ongoing top score
        self.top_score = -np.inf
        self.score = 0
コード例 #14
0
    def __init__(self, task, verbose=False):
        self.verbose = verbose

        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        #log_path = '/tmp/logs'
        #self.callback = callbacks.TensorBoard(log_dir=log_path, histogram_freq=1,
        #                        write_images=False, write_grads=True, write_graph=False)
        #self.callback.set_model(self.critic_local.model)

        #log_path = '/tmp/logs'
        #self.writer = tf.summary.FileWriter(log_path)

        #self.learn_counter = 0

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0.1
        self.exploration_theta = 0.2
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 512
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.015  # for soft update of target parameters
コード例 #15
0
ファイル: agent.py プロジェクト: Rababalkhalifa/QuadCop
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters

        # from plicy search
        
        self.action_range = self.action_high - self.action_low
        
        self.w = np.random.normal(
            size=(self.state_size, self.action_size),  # weights for simple linear policy: state_space x action_space
            scale=(self.action_range / (2 * self.state_size))) # start producing actions in a decent range
        
        # Score tracker and learning parameters
        self.score = -np.inf
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1
        
        #counter
        self.count = 0
コード例 #16
0
ファイル: agent.py プロジェクト: yokolet/DeepLearning
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        #self.gamma = 0.99  # discount factor
        self.gamma = 1.0
        #self.tau = 0.01  # for soft update of target parameters
        self.tau = 0.001

        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.001

        self.rotor_speed_min = 0
        self.rotor_speed_max = 600

        self.best_score = -np.inf
コード例 #17
0
    def setup(self, obs_spec, action_spec):
        self.obs_spec = obs_spec
        self.action_spec = action_spec

        # can't do this
        self.action_space = actions.ActionSpace
        print('setting up agent with action space:', self.action_space)

        self.w = np.random.normal(size=())

        # number of different actions is pretty large: 541 different action id's
        self.action_space = len(self.action_spec.functions._func_list)

        # lets try to make the observation_space the feature_screen as a start
        # maybe not all of them. dims are (17,84,84)
        self.observation_space = self.obs_spec.feature_screen

        #observation_space = obs_spec.player_relative
        print('this is the obs_space for player_relative:',
              self.observation_space)

        print('made it!')
        self.local_actor = Actor(state_size=self.observation_space,
                                 action_size=self.action_space,
                                 action_low=0,
                                 action_high=541)
コード例 #18
0
    def __init__(self, task):
        """Initialize DDPG Agent instance."""
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_high = task.action_high
        self.action_low = task.action_low

        # Initializing local and target Actor Models
        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_high, self.action_low)

        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_high, self.action_low)

        # Initializing local and target Critic Models
        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())

        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay Memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters

        # Additional Parameters
        self.best_score = -np.inf
        self.total_reward = 0.0
        self.count = 0
        self.score = 0
コード例 #19
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        # Y.W. changing sigma
        self.exploration_sigma = 0.3  #0.3 #0.2 # 0.3
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        # Y.W. extending buffer_size
        self.buffer_size = 1000000  #100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        # self.tau = 0.01  # for soft update of target parameters
        # Y.W.
        self.tau = 0.001  # 0.001

        # simple reword cash
        self.total_reward = 0.0
        self.best_total_reward = -np.inf
コード例 #20
0
    def __init__(self, task, name, loadfile=False):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        self.name = name
        if loadfile:
            self.actor_local.model.load_weights("./weights/" + name +
                                                "_actor.h5")
            self.critic_local.model.load_weights("./weights/" + name +
                                                 "_critic.h5")

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15  #0.3 #original 0.15
        self.exploration_sigma = 0.3  #0.3 #original 0.3

        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 1000000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters
コード例 #21
0
    def __init__(self, env):
        self.env = env
        self.state_size = self.env.observation_space.shape[0]
        self.action_size = self.env.action_space.shape[0]
        self.action_low = self.env.action_space.low[0]
        self.action_high = self.env.action_space.high[0]

        # Learning rates
        self.actor_learning_rate = 1e-4
        self.critic_learning_rate = 1e-3

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high,
                                 self.actor_learning_rate)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high,
                                  self.actor_learning_rate)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size,
                                   self.critic_learning_rate)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    self.critic_learning_rate)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.1
        self.exploration_sigma = 0.1
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 1000000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters
コード例 #22
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high
        self.action_range = self.action_high - self.action_low
        self.score = 0

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size,
                                   self.action_low, self.action_high)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    self.action_low, self.action_high)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000  # Taken from paper - changed from 10000 originally
        self.batch_size = 64  # Taken from paper - changed from 64 originally
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.005  # Tau for soft update of target parameters. Taken from paper - changed from 0.01 originally

        # Reset the episode when model set up
        self.reset_episode()
コード例 #23
0
    def __init__(self, task):
        # Task (environment) information
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high
        self.action_range = self.action_high - self.action_low

        self.w = np.random.normal(
            size=(
                self.state_size, self.action_size
            ),  # weights for simple linear policy: state_space x action_space
            scale=(self.action_range / (2 * self.state_size)
                   ))  # start producing actions in a decent range

        self.actor = Actor(self.state_size, self.action_size, self.action_low,
                           self.action_high)
        self.critic = Critic(self.state_size, self.action_size)

        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)
        self.critic_target = Critic(self.state_size, self.action_size)

        self.gamma = 0.95
        self.tau = 0.001

        self.best_w = None
        self.best_score = -np.inf

        self.exploration_mu = 0.5
        self.exploration_theta = 0.2
        self.exploration_sigma = 0.4
        self.noise = Noise(self.action_size, self.exploration_mu,
                           self.exploration_theta, self.exploration_sigma)

        self.buffer_size = 100000
        self.batch_size = 32
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        self.best_score = -np.inf
        self.num_steps = 0

        # Episode variables
        self.reset_episode()
コード例 #24
0
ファイル: agent.py プロジェクト: jthoms/RL-Quadcopter-2
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.001
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        # tuning gamma between (0.95 - 0.99)
        self.gamma = 0.99  # discount factor
        # tuning tau around (0.001 - 0.01)
        self.tau = 0.005  # for soft update of target parameters

        self.best_score = -np.inf
        self.score = 0
        self.step_count = 0
コード例 #25
0
    def __init__(self, task, exp_mu, exp_theta, exp_sigma, gamma, tau):
        self.task = task

        self.s_size = task.s_size
        self.a_size = task.a_size

        self.a_low = task.a_low
        self.a_high = task.a_high

        # Actor Model
        self.actor_local = Actor(self.s_size, self.a_size, self.a_low,
                                 self.a_high)
        self.actor_target = Actor(self.s_size, self.a_size, self.a_low,
                                  self.a_high)

        # Critic Model
        self.critic_local = Critic(self.s_size, self.a_size)
        self.critic_target = Critic(self.s_size, self.a_size)

        # Initialize target model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # initialize noise
        self.exp_mu = exp_mu
        self.exp_theta = exp_theta
        self.exp_sigma = exp_sigma
        self.noise = OUNoise(self.a_size, self.exp_mu, self.exp_theta,
                             self.exp_sigma)

        # For Replay buffer
        self.buff_size = 1024 * 1024
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buff_size, self.batch_size)

        # discount factor
        self.gamma = gamma

        # for soft update of target parameters
        self.tau = tau
コード例 #26
0
    def __init__(self, task):
        """Initialize models"""
        self.env = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_high = task.action_high
        self.action_low = task.action_low

        # Initialize Actor (policy) models
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Initialize Critic (value) models
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay buffer

        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.9  # discount factor
        self.tau = 0.001  # for soft update of target parameters
コード例 #27
0
    def __init__(self, task, lra, lrc, db):
        self.task = task
        self.s_sz = task.state_size
        self.a_sz = task.action_size
        self.a_max = task.max_action

        # Actor (Policy) Model
        self.actor_local = Actor(self.s_sz, self.a_sz, lra)
        self.actor_target = Actor(self.s_sz, self.a_sz, lra)

        # First Critic (Value) Model
        self.critic_local_1 = Critic(self.s_sz, self.a_sz, lrc)
        self.critic_target_1 = Critic(self.s_sz, self.a_sz, lrc)

        # Second Critic (Value) Model
        self.critic_local_2 = Critic(self.s_sz, self.a_sz, lrc)
        self.critic_target_2 = Critic(self.s_sz, self.a_sz, lrc)

        # Initialize target model parameters with local model parameters
        self.critic_target_1.model.set_weights(
            self.critic_local_1.model.get_weights())
        self.critic_target_2.model.set_weights(
            self.critic_local_2.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.noise = GaussianNoise(self.a_sz)

        # Replay memory
        self.num_exp = 0
        self.batch = 32
        self.buffer = 10000
        labels = ["state", "action", "reward", "next_state", "done"]
        self.experience = namedtuple("Experience", field_names=labels)
        self.memory = PrioritizedReplayBuffer(self.buffer, self.batch, db)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.005  # for soft update of target parameters
コード例 #28
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.001

        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)

        self.gamma = 0.99
        self.tau = 0.1
        self.learning_rate = 0.0005

        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        self.actor_local = Actor(self.state_size,
                                 self.action_size,
                                 self.action_low,
                                 self.action_high,
                                 learning_rate=self.learning_rate)
        self.actor_target = Actor(self.state_size,
                                  self.action_size,
                                  self.action_low,
                                  self.action_high,
                                  learning_rate=self.learning_rate)

        self.critic_local = Critic(self.state_size,
                                   self.action_size,
                                   learning_rate=self.learning_rate)
        self.critic_target = Critic(self.state_size,
                                    self.action_size,
                                    learning_rate=self.learning_rate)
コード例 #29
0
    def __init__(self):
        self.reward = 0
        self.episodes = 0
        self.steps = 0
        self.obs_spec = None
        self.action_spec = None
        self.w = None
        self.action_space = 0
        self.observation_space = 0

        self.local_actor = Actor()
        self.target_actor = None
        self.local_critic = None
        self.target_critic = None
    def create_models(self,
                      hidden_sizes_actor=(512, 256),
                      hidden_sizes_critic=(512, 256, 256)):
        self.actor_local = Actor(self.state_size,
                                 self.action_size,
                                 self.action_low,
                                 self.action_high,
                                 hidden_sizes=hidden_sizes_actor)
        self.actor_target = Actor(self.state_size,
                                  self.action_size,
                                  self.action_low,
                                  self.action_high,
                                  hidden_sizes=hidden_sizes_actor)
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        self.critic_local = Critic(self.state_size,
                                   self.action_size,
                                   hidden_sizes=hidden_sizes_critic)
        self.critic_target = Critic(self.state_size,
                                    self.action_size,
                                    hidden_sizes=hidden_sizes_critic)
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())