def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.3 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) self.buffer_size = 1000000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) self.gamma = 0.99 self.tau = 0.001
def __init__(self, cfg): # Environment configuration self.action_shape = cfg['env']['action_shape'] # Replay memory cfg['agent']['memory']['action_shape'] = self.action_shape self.memory = ReplayBuffer(**cfg['agent']['memory']) # Algorithm parameters self.exploration_mu, self.exploration_sigma = cfg['agent']['noise'] self.gamma = cfg['agent']['gamma'] self.tau = cfg['agent']['tau'] state_flatten_shape = [np.prod(self.memory.flatten_state_shape)] # Actor Model self.actor = Actor(state_flatten_shape, self.action_shape, cfg['env']['action_range'], self.tau, self.memory.batch_size, cfg['actor']) # Critic Model self.critic = Critic(state_flatten_shape, self.action_shape, self.tau, cfg['critic']) # Flag & Counter self.training = True self.episode = 0 self.max_episode_explore = cfg['agent']['explore']
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.2 self.exploration_sigma = 0.33 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 128 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.9 # discount factor self.tau = 0.0015 # for soft update of target parameters
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) self.exploration_mu = 0 self.exploration_theta = 0.10 # same direction self.exploration_sigma = 0.001 # random noise self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) self.gamma = 0.90 # discount factor self.tau = 0.1 # for soft update of target parameters self.best_score = -np.inf self.score = 0
def __init__(self, task, gamma=0.99 , tau=0.01, buffer_size=100000, batch_size=64 ): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 # mean/equilibrium value self.exploration_theta = 0.15 #0.6 #0.15 # how fast the variable returns towards the mean self.exploration_sigma = 0.2 #0.3 #0.2 # volatility of process self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = buffer_size self.batch_size = batch_size self.memory = ReplayBuffer(self.buffer_size, self.batch_size) #Algorithm parameters self.gamma = gamma self.tau = tau
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Create the actor instances for local and target self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Create the critic instances for local and target self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # LOAD the weights self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise hyperparameters self.exploration_mu = 0 self.exploration_theta = 0.35 self.exploration_sigma = 0.1 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Set the replay memory self.buffer_size = 100000 self.batch_size = 32 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Update function hyperparameters self.gamma = 0.99 self.tau = 0.001
def __init__(self, task, buffer_size=100000, batch_size=64, gamma=0.99, tau=0.01): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, learning_rate=1e-3) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, learning_rate=1e-3) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size, learning_rate=1e-4) self.critic_target = Critic(self.state_size, self.action_size, learning_rate=1e-4) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(size=self.action_size) # Replay memory self.buffer_size = buffer_size self.batch_size = batch_size # 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = gamma # 0.99 discount factor self.tau = tau # 0.01 for soft update of target parameters # 初始化 self.last_state = None self.total_reward = 0.0 # Score tracker and learning parameters self.score = 0 self.best_score = -np.inf self.count = 0
def __init__(self, task, train=True): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Set the learning rate suggested by paper: https://pdfs.semanticscholar.org/71f2/03de1a53deae81a7707143f0ed564661e279.pdf self.actor_learning_rate = 0.001 self.actor_decay = 0.0 self.critic_learning_rate = 0.001 self.critic_decay = 0.0 # Actor Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_learning_rate, self.actor_decay) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_learning_rate, self.actor_decay) # Critic Model self.critic_local = Critic(self.state_size, self.action_size, self.critic_learning_rate, self.critic_decay) self.critic_target = Critic(self.state_size, self.action_size, self.critic_learning_rate, self.critic_decay) # initialize targets model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 # self.exploration_theta = 0.15 # self.exploration_sigma = 0.2 self.exploration_theta = 0.01 self.exploration_sigma = 0.02 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) self.best_w = None self.best_score = -np.inf # self.noise_scale = 0.7 self.score = 0 # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters # Indicate if we want to learn (or use to predict without learn) self.set_train(train)
def __init__(self, task, seed=None, render=False): self.env = task.env self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.total_reward = 0 self.steps = 0 self.action_repeat = 3 self.render = render # Score tracker and learning parameters self.score = -np.inf self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 #counter self.count = 0 # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise(1, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters
def __init__(self, task, actor_learning_rate=0.001, critic_learning_rate=0.001, tau=0.01, gamma=0.99, buffer_size=100000, batch_size=64, exploration_mu=0, exploration_theta=0.15, exploration_sigma=0.2): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, actor_learning_rate) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, actor_learning_rate) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size, critic_learning_rate) self.critic_target = Critic(self.state_size, self.action_size, critic_learning_rate) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = exploration_mu self.exploration_theta = exploration_theta self.exploration_sigma = exploration_sigma self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = buffer_size self.batch_size = batch_size self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = gamma # discount factor self.tau = tau # for soft update of target parameters
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 #self.exploration_mu = 0.0001 #self.exploration_theta = 0.2 #self.exploration_sigma = 0.25 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters #self.tau = .005 self.closeCount = 0 # Score tracker ##################### self.best_score = -np.inf self.total_reward = 0.0 self.currentclose = 0
def __init__(self, task, sess): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high self.action_range = self.action_high - self.action_low # Algorithm parameters self.gamma = 0.9 # discount factor self.tau = 2e-3 # for soft update of target parameters self.actor_lr = 2e-3 self.critic_lr = 2e-3 # END self.reward_variance = RunningVariance(1) self.q_values_variance = RunningVariance(1) # Actor (Policy) Model self.actor_local = Actor(sess, self.state_size, self.action_size, self.action_low, self.action_high, self.actor_lr) self.actor_target = Actor(sess, self.state_size, self.action_size, self.action_low, self.action_high, self.actor_lr) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size, self.critic_lr) self.critic_target = Critic(self.state_size, self.action_size, self.critic_lr) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.8 self.exploration_sigma = 0.05 * self.action_range self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 100 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # initialize sess.run(tf.global_variables_initializer())
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model actor_local_params = actor_params() actor_target_params = actor_params() self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, params=actor_local_params) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, params=actor_target_params) # Critic (Value) Model critic_local_params = critic_params() critic_target_params = critic_params() self.critic_local = Critic(self.state_size, self.action_size, params=critic_local_params) self.critic_target = Critic(self.state_size, self.action_size, params=critic_target_params) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) agent_par = agent_params(self.action_size) # Noise process self.noise = agent_par.noise # Replay memory self.batch_size = agent_par.batch_size self.memory = agent_par.memory # Algorithm parameters self.gamma = agent_par.gamma # discount factor self.tau = agent_par.tau # for soft update of target parameters # Compute the ongoing top score self.top_score = -np.inf self.score = 0
def __init__(self, task, verbose=False): self.verbose = verbose self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) #log_path = '/tmp/logs' #self.callback = callbacks.TensorBoard(log_dir=log_path, histogram_freq=1, # write_images=False, write_grads=True, write_graph=False) #self.callback.set_model(self.critic_local.model) #log_path = '/tmp/logs' #self.writer = tf.summary.FileWriter(log_path) #self.learn_counter = 0 # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0.1 self.exploration_theta = 0.2 self.exploration_sigma = 0.2 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 512 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.015 # for soft update of target parameters
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters # from plicy search self.action_range = self.action_high - self.action_low self.w = np.random.normal( size=(self.state_size, self.action_size), # weights for simple linear policy: state_space x action_space scale=(self.action_range / (2 * self.state_size))) # start producing actions in a decent range # Score tracker and learning parameters self.score = -np.inf self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 #counter self.count = 0
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters #self.gamma = 0.99 # discount factor self.gamma = 1.0 #self.tau = 0.01 # for soft update of target parameters self.tau = 0.001 self.epsilon = 1.0 self.epsilon_decay = 0.995 self.epsilon_min = 0.001 self.rotor_speed_min = 0 self.rotor_speed_max = 600 self.best_score = -np.inf
def setup(self, obs_spec, action_spec): self.obs_spec = obs_spec self.action_spec = action_spec # can't do this self.action_space = actions.ActionSpace print('setting up agent with action space:', self.action_space) self.w = np.random.normal(size=()) # number of different actions is pretty large: 541 different action id's self.action_space = len(self.action_spec.functions._func_list) # lets try to make the observation_space the feature_screen as a start # maybe not all of them. dims are (17,84,84) self.observation_space = self.obs_spec.feature_screen #observation_space = obs_spec.player_relative print('this is the obs_space for player_relative:', self.observation_space) print('made it!') self.local_actor = Actor(state_size=self.observation_space, action_size=self.action_space, action_low=0, action_high=541)
def __init__(self, task): """Initialize DDPG Agent instance.""" self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_high = task.action_high self.action_low = task.action_low # Initializing local and target Actor Models # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_high, self.action_low) self.actor_target = Actor(self.state_size, self.action_size, self.action_high, self.action_low) # Initializing local and target Critic Models # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay Memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters # Additional Parameters self.best_score = -np.inf self.total_reward = 0.0 self.count = 0 self.score = 0
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 # Y.W. changing sigma self.exploration_sigma = 0.3 #0.3 #0.2 # 0.3 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory # Y.W. extending buffer_size self.buffer_size = 1000000 #100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor # self.tau = 0.01 # for soft update of target parameters # Y.W. self.tau = 0.001 # 0.001 # simple reword cash self.total_reward = 0.0 self.best_total_reward = -np.inf
def __init__(self, task, name, loadfile=False): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) self.name = name if loadfile: self.actor_local.model.load_weights("./weights/" + name + "_actor.h5") self.critic_local.model.load_weights("./weights/" + name + "_critic.h5") # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 #0.3 #original 0.15 self.exploration_sigma = 0.3 #0.3 #original 0.3 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 1000000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters
def __init__(self, env): self.env = env self.state_size = self.env.observation_space.shape[0] self.action_size = self.env.action_space.shape[0] self.action_low = self.env.action_space.low[0] self.action_high = self.env.action_space.high[0] # Learning rates self.actor_learning_rate = 1e-4 self.critic_learning_rate = 1e-3 # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_learning_rate) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_learning_rate) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size, self.critic_learning_rate) self.critic_target = Critic(self.state_size, self.action_size, self.critic_learning_rate) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.1 self.exploration_sigma = 0.1 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 1000000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high self.action_range = self.action_high - self.action_low self.score = 0 # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size, self.action_low, self.action_high) self.critic_target = Critic(self.state_size, self.action_size, self.action_low, self.action_high) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 # Taken from paper - changed from 10000 originally self.batch_size = 64 # Taken from paper - changed from 64 originally self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.005 # Tau for soft update of target parameters. Taken from paper - changed from 0.01 originally # Reset the episode when model set up self.reset_episode()
def __init__(self, task): # Task (environment) information self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high self.action_range = self.action_high - self.action_low self.w = np.random.normal( size=( self.state_size, self.action_size ), # weights for simple linear policy: state_space x action_space scale=(self.action_range / (2 * self.state_size) )) # start producing actions in a decent range self.actor = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.critic = Critic(self.state_size, self.action_size) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.critic_target = Critic(self.state_size, self.action_size) self.gamma = 0.95 self.tau = 0.001 self.best_w = None self.best_score = -np.inf self.exploration_mu = 0.5 self.exploration_theta = 0.2 self.exploration_sigma = 0.4 self.noise = Noise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) self.buffer_size = 100000 self.batch_size = 32 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) self.best_score = -np.inf self.num_steps = 0 # Episode variables self.reset_episode()
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.001 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters # tuning gamma between (0.95 - 0.99) self.gamma = 0.99 # discount factor # tuning tau around (0.001 - 0.01) self.tau = 0.005 # for soft update of target parameters self.best_score = -np.inf self.score = 0 self.step_count = 0
def __init__(self, task, exp_mu, exp_theta, exp_sigma, gamma, tau): self.task = task self.s_size = task.s_size self.a_size = task.a_size self.a_low = task.a_low self.a_high = task.a_high # Actor Model self.actor_local = Actor(self.s_size, self.a_size, self.a_low, self.a_high) self.actor_target = Actor(self.s_size, self.a_size, self.a_low, self.a_high) # Critic Model self.critic_local = Critic(self.s_size, self.a_size) self.critic_target = Critic(self.s_size, self.a_size) # Initialize target model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # initialize noise self.exp_mu = exp_mu self.exp_theta = exp_theta self.exp_sigma = exp_sigma self.noise = OUNoise(self.a_size, self.exp_mu, self.exp_theta, self.exp_sigma) # For Replay buffer self.buff_size = 1024 * 1024 self.batch_size = 64 self.memory = ReplayBuffer(self.buff_size, self.batch_size) # discount factor self.gamma = gamma # for soft update of target parameters self.tau = tau
def __init__(self, task): """Initialize models""" self.env = task self.state_size = task.state_size self.action_size = task.action_size self.action_high = task.action_high self.action_low = task.action_low # Initialize Actor (policy) models self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Initialize Critic (value) models self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay buffer self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.9 # discount factor self.tau = 0.001 # for soft update of target parameters
def __init__(self, task, lra, lrc, db): self.task = task self.s_sz = task.state_size self.a_sz = task.action_size self.a_max = task.max_action # Actor (Policy) Model self.actor_local = Actor(self.s_sz, self.a_sz, lra) self.actor_target = Actor(self.s_sz, self.a_sz, lra) # First Critic (Value) Model self.critic_local_1 = Critic(self.s_sz, self.a_sz, lrc) self.critic_target_1 = Critic(self.s_sz, self.a_sz, lrc) # Second Critic (Value) Model self.critic_local_2 = Critic(self.s_sz, self.a_sz, lrc) self.critic_target_2 = Critic(self.s_sz, self.a_sz, lrc) # Initialize target model parameters with local model parameters self.critic_target_1.model.set_weights( self.critic_local_1.model.get_weights()) self.critic_target_2.model.set_weights( self.critic_local_2.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = GaussianNoise(self.a_sz) # Replay memory self.num_exp = 0 self.batch = 32 self.buffer = 10000 labels = ["state", "action", "reward", "next_state", "done"] self.experience = namedtuple("Experience", field_names=labels) self.memory = PrioritizedReplayBuffer(self.buffer, self.batch, db) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.005 # for soft update of target parameters
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.001 self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) self.gamma = 0.99 self.tau = 0.1 self.learning_rate = 0.0005 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, learning_rate=self.learning_rate) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, learning_rate=self.learning_rate) self.critic_local = Critic(self.state_size, self.action_size, learning_rate=self.learning_rate) self.critic_target = Critic(self.state_size, self.action_size, learning_rate=self.learning_rate)
def __init__(self): self.reward = 0 self.episodes = 0 self.steps = 0 self.obs_spec = None self.action_spec = None self.w = None self.action_space = 0 self.observation_space = 0 self.local_actor = Actor() self.target_actor = None self.local_critic = None self.target_critic = None
def create_models(self, hidden_sizes_actor=(512, 256), hidden_sizes_critic=(512, 256, 256)): self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, hidden_sizes=hidden_sizes_actor) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, hidden_sizes=hidden_sizes_actor) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.critic_local = Critic(self.state_size, self.action_size, hidden_sizes=hidden_sizes_critic) self.critic_target = Critic(self.state_size, self.action_size, hidden_sizes=hidden_sizes_critic) self.critic_target.model.set_weights( self.critic_local.model.get_weights())