def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space self.action_space = 3 self.acts = np.zeros(shape=self.task.action_space.shape) self.Q = defaultdict(lambda: np.zeros(self.action_space)) # Episode variables self.reset_episode_vars() self.episode_num = 1 self.step_count = 20 # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file print("Saving stats to {}".format(self.stats_filename)) # [debug] # Save Q stats self.q_stats_filename = os.path.join( util.get_param('out'), "q_stats_{}.csv".format(util.get_timestamp())) # path to CSV file print("Saving q stats to {}".format(self.q_stats_filename)) # [debug] # Save S-A stats self.sa_stats_filename = os.path.join( util.get_param('out'), "state_action_{}.csv".format( util.get_timestamp())) # path to CSV file print("Saving states actions to {}".format( self.sa_stats_filename)) # [debug]
def __init__(self, task): # Task State Action self.task = task # should contain observation_space and action_space self.state_size = 7 self.action_size = 1 # Actor (Policy) Model self.acts = np.zeros(shape=self.task.action_space.shape) self.actor_local = Actor(self.state_size, self.action_size) self.actor_target = Actor(self.state_size, self.action_size) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 128 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.005 # for soft update of target parameters self.count = 0 self.reset_episode_vars() self.epsilon = 1 self.episode_num = 1 # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file print("Saving stats to {}".format(self.stats_filename)) # [debug] # Save Q stats self.q_stats_filename = os.path.join( util.get_param('out'), "q_stats_{}.csv".format(util.get_timestamp())) # path to CSV file print("Saving q stats to {}".format(self.q_stats_filename)) # [debug] # Save S-A stats self.sa_stats_filename = os.path.join( util.get_param('out'), "state_action_{}.csv".format( util.get_timestamp())) # path to CSV file print("Saving states actions to {}".format( self.sa_stats_filename)) # [debug]
def __init__(self, task): '''Initialize policy and other agent parameters. :param task: Should be able to access the following (OpenAI Gym spaces): task.observation_space # i.e. state space task.action_space ''' # init statistics writing self.stats_dir = util.get_param('out') if not os.path.exists(self.stats_dir): os.makedirs(self.stats_dir) self.stats_filename = os.path.join( self.stats_dir, 'stats_{}.csv'.format(util.get_timestamp()) ) self.stats_columns = ['episode', 'total_reward'] print('Saving statistics {} to {}'.format(self.stats_columns, self.stats_filename)) # init models writing self.models_dir = util.get_param('models') self.actor_best_model_file = self.models_dir + '/actor_best.pth' self.critic_best_model_file = self.models_dir + '/critic_best.pth' if not os.path.exists(self.models_dir): os.makedirs(self.models_dir)
def __init__(self, task): #--------------------------------------- # Saving data self.stats_filename = os.path.join( util.get_param('out') + '/task04/', "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # task_takeoff = deepcopy(task) # task_hover = deepcopy(task) # task_land = deepcopy(task) self.task = task self.task_takeoff = takeoff_b.TakeoffB() self.task_hover = hover_b.HoverB() self.task_land = land_b.LandB() self.o_task01_agent = task01_ddpg_agent_b.Task01_DDPG( self.task_takeoff) self.o_task02_agent = task02_ddpg_agent_b.Task02_DDPG(self.task_hover) self.o_task03_agent = task03_ddpg_agent_b.Task03_DDPG(self.task_land) # Current agent self.o_current_agent = self.o_task01_agent self.mode = 0 self.episode_num = 0 self.total_reward = 0.0
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space self.state_size = np.prod(self.task.observation_space.shape) self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = np.prod(self.task.action_space.shape) self.action_range = self.task.action_space.high - self.task.action_space.low # Policy parameters self.w = np.random.normal( size=( self.state_size, self.action_size ), # weights for simple linear policy: state_space x action_space scale=(self.action_range / (2 * self.state_size)).reshape( 1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "dummy_stats_{}.csv".format( util.get_timestamp())) # path to CSV file self.episode_num = 1
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space # self.state_size = np.prod(self.task.observation_space.shape) self.state_size = 3 self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = 3 # self.action_size = np.prod(self.task.action_space.shape) self.action_range = self.task.action_space.high - self.task.action_space.low # Policy parameters self.w = np.random.normal( size=(self.state_size, self.action_size), # weights for simple linear policy: state_space x action_space scale=(self.action_range[:3] / (2 * self.state_size)).reshape(1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), '{}_{}_stats_{}.csv'.format(self.task, self, util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print('Saving stats {} to {}'.format(self.stats_columns, self.stats_filename)) # [debug]
def save_episode_stats(self): self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] self.episode_num = 1 print("### Saving stats {} to {}".format(self.stats_columns, self.stats_filename))
def __init__(self, task): self.task = task # Constrain State and Action matrices self.state_size = 6 self.action_size = 3 # For debugging: print( "Constrained State {} and Action {}; Original State {} and Action {}" .format(self.state_size, self.action_size, self.task.observation_space.shape, self.task.action_space.shape)) # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Save episode statistics for analysis self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] self.episode_num = 1 print("Save Stats {} to {}".format(self.stats_columns, self.stats_filename)) # Actor Model self.action_low = self.task.action_space.low[0:3] self.action_high = self.task.action_space.high[0:3] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize model parameters with local parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Process noise self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(size=self.buffer_size) # Algorithm Parameters self.gamma = 0.99 # discount self.tau = 0.001 # soft update of targets # Episode vars self.reset_episode_vars()
def __init__(self, task): self.task = task # should contain observation_space and action_space self.state_size = np.prod(self.task.observation_space.shape) self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = np.prod(self.task.action_space.shape) self.action_range = self.task.action_space.high - self.task.action_space.low # Constrain state and action spaces # Actor (Policy) Model self.action_low = self.task.action_space.low self.action_high = self.task.action_space.high self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 248 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # Policy parameters self.w = np.random.normal( size=(self.state_size, self.action_size), # weights for simple linear policy: state_space x action_space scale=(self.action_range / (2 * self.state_size)).reshape(1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] # Episode variables self.reset_episode_vars()
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space self.state_size = 3 self.action_size = 3 print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # Parameters self.actor_weights = os.path.join(util.get_param('out'), "actor_weights.h5") self.critic_weights = os.path.join(util.get_param('out'), "critic_weights.h5") # Actor (Policy) Model self.action_low = self.preprocess_state(self.task.action_space.low) self.action_high = self.preprocess_state(self.task.action_space.high) self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize local model parameters with loaded weights if os.path.isfile(self.critic_weights): self.critic_local.model.load_weights(self.critic_weights) if os.path.isfile(self.actor_weights): self.actor_local.model.load_weights(self.actor_weights) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug]
def __init__(self): # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = [ 'episode', 'height', 'target_distance', 'total_reward' ] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug]
def __init__(self, task): self.task = task self.state_size = 3 # position only self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = 3 # force only self.action_range = (self.task.action_space.high - self.task.action_space.low)[0:self.action_size] # Actor (Policy) model self.action_low = self.task.action_space.low[0:self.action_size] self.action_high = self.task.action_space.high[0:self.action_size] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.5 # Discount factor self.tau = 0.001 # for soft update of target parameters self.reset_episode_vars() # Save episodes stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename))
def __init__(self, task): self.task = task # should contain observation_space and action_space self.state_shape = (9, ) self.action_shape = (1, ) self.nb_actions = np.prod(self.action_shape) self.action_range = self.task.action_space.high[ 2] - self.task.action_space.low[2] # Replay memory self.buffer_size = 100000 self.batch_size = 128 self.memory = ReplayBuffer(self.buffer_size, self.action_shape, self.state_shape) # Noise process self.noise = OUNoise(self.nb_actions) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.005 # 0.005 self.actor_lr = 0.0001 #0.0001 self.critic_lr = 0.001 #initialize self.a2c = A2C(self.state_shape, self.action_shape, actor_lr=self.actor_lr, critic_lr=self.critic_lr, gamma=self.gamma) self.initialize() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] #initial episode vars self.last_state = None self.last_action = None self.total_reward = 0.0 self.count = 0 self.acts = np.zeros(shape=self.task.action_space.shape)
def __init__(self, task): self.task = task #constrain state and action spaces self.state_size = 1 self.state_low = self.task.observation_space.low[2] self.state_high = self.task.observation_space.high[2] self.state_range = self.state_high - self.state_low #only limit to z direction self.action_range = (self.task.action_space.high - self.task.action_space.low)[2] self.action_low = self.task.action_space.low[2] self.action_high = self.task.action_space.high[2] stepping = (self.action_high - 10.0) / 16.0 self.discrete_actions = np.arange(10.0, self.action_high + 0.1, stepping) self.action_size = len(self.discrete_actions) print('discrete action:', self.discrete_actions, ', action size: ', self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.9 # discount factor self.learning_rate = 0.001 self.model = self.build_model() #save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] self.episode_num = 1 print("saving stats {} to {}".format(self.stats_columns, self.stats_filename)) self.epilson = 1.0 self.epilson_decay = 0.96 self.epilson_min = 0.05 self.learning = True self.reset_episode_vars() self.best_reward = -99999
def __init__(self, task): self.task = task self.state_size = 3 self.action_size = 3 #set action space limits self.action_low = self.task.action_space.low[0:3] self.action_high = self.task.action_space.high[0:3] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) action = [self.action_size, self.action_low, self.action_high] #Initialize network #Actor self.actor_local = Actor(self.state_size, action) self.actor_target = Actor(self.state_size, action) #Critic self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) self.setup_weights() #noise self.noise = OUNoise(self.action_size) #Replay buffer self.buffer_size = 100000 self.batch_size = 128 self.memory = ReplayBuffer(self.buffer_size) #Hyper params self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # log file self.stats = os.path.join(util.get_param('out'), "stats_{}.csv".format( util.get_timestamp())) self.episode_no = 1 self.stats_columns = ['episodes', 'total_reward'] print("Saving stats {} to {}".format(self.stats_columns, self.stats)) # Episode variables self.reset_episode_vars()
def __init__(self, task, action_min, action_max, state_min, state_max): # Task (environment) information self.task = task # should contain observation_space and action_space self.min_action = action_min # define minimum and maximum action self.max_action = action_max self.min_stat = state_min # define minimum and maximum state self.max_stat = state_max self.learn_when_done = False # defines if the agent shall only learn at the end of each episode # Constrain state and action spaces self.state_size = self.max_stat-self.min_stat+1 # position only self.action_size = self.max_action-self.min_action+1 # force only print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # calc state space minimum and range self.state_low = self.task.observation_space.low[self.min_stat:self.max_stat+1] self.state_range = self.task.observation_space.high[self.min_stat:self.max_stat+1] - self.state_low # self.action_size = np.prod(self.task.action_space.shape) # calc action space minimum, maximum and range self.action_low = self.task.action_space.low[self.min_action:self.max_action+1] self.action_high = self.task.action_space.high[self.min_action:self.max_action+1] self.action_range = self.action_high-self.action_low # Replay memory self.epsilon = 0.0 self.batch_size = 64 self.buffer_size = 100000 self.memory = ReplayBuffer(self.buffer_size) # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward', 'learning_rate'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] # Episode variables self.reset_episode_vars()
def __init__(self, task): self.task = task # should contain observation_space and action_space self.state_size = np.prod(self.task.observation_space.shape) self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = np.prod(self.task.action_space.shape) self.action_range = self.task.action_space.high - self.task.action_space.low self.action_low = self.task.action_space.low[0:6] self.action_high = self.task.action_space.high[0:6] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # Policy parameters self.w = np.random.normal( size=( self.state_size, self.action_size ), # weights for simple linear policy: state_space x action_space scale=(self.action_range / (2 * self.state_size)).reshape( 1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "Hoverstats_{}.csv".format( util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] # Episode variables self.reset_episode_vars()
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space #self.state_size = np.prod(self.task.observation_space.shape) # self.task.observation_space.high = self.task.observation_space.high[2:3] # self.task.observation_space.low = self.task.observation_space.low[2:3] self.state_range = self.task.observation_space.high - self.task.observation_space.low #self.action_size = np.prod(self.task.action_space.shape) self.action_range = self.task.action_space.high - self.task.action_space.low self.task.observation_space.high = self.task.observation_space.high[ 2:3] self.task.observation_space.low = self.task.observation_space.low[2:3] #self.state_range = self.state_range[2:3] #self.action_range = self.action_range[2:3] # Constrain state and action spaces self.state_size = 1 # position only self.action_size = 1 # force only self.action_low = self.task.action_space.low[2:3] self.action_high = self.task.action_space.high[2:3] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # # Policy parameters # self.w = np.random.normal( # size=(self.state_size, self.action_size), # weights for simple linear policy: state_space x action_space # scale=(self.action_range / (2 * self.state_size)).reshape(1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Actor (Policy) Model #self.action_low = self.task.action_space.low #self.action_high = self.task.action_space.high self.state_range = self.state_range[2:3] self.action_range = self.action_range[2:3] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # Episode variables #self.reset_episode_vars() #--------------------------------------- # Saving data self.stats_filename = os.path.join( util.get_param('out') + '/task01/', "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # Load/save parameters self.load_weights = True # try to load weights from previously saved models self.save_weights_every = 1 # save weights every n episodes, None to disable self.model_dir = util.get_param( 'out' ) + '/task01' # you can use a separate subdirectory for each task and/or neural net architecture self.model_name = "my-model_" + util.get_timestamp() self.model_ext = ".h5" # if self.load_weights or self.save_weights_every: # self.actor_filename_local = os.path.join(self.model_dir, # "{}_actor_local{}".format(self.model_name, self.model_ext)) # self.critic_filename_local = os.path.join(self.model_dir, # "{}_critic_local{}".format(self.model_name, self.model_ext)) # self.actor_filename_target = os.path.join(self.model_dir, # "{}_actor_target{}".format(self.model_name, self.model_ext)) # self.critic_filename_target = os.path.join(self.model_dir, # "{}_critic_target{}".format(self.model_name, self.model_ext)) # print("Actor local filename :", self.actor_filename_local) # [debug] # print("Critic local filename:", self.critic_filename_local) # [debug] # print("Actor target filename :", self.actor_filename_target) # [debug] # print("Critic target filename:", self.critic_filename_target) # [debug] # Load pre-trained model weights, if available #if self.load_weights and os.path.isfile(self.actor_filename_local): if self.load_weights: try: date_of_file = '2018-02-20_11-28-13' #date_of_file = '2018-02-20_11-22-27' self.actor_filename_local = os.path.join( self.model_dir, 'my-model_{}_actor_local.h5'.format(date_of_file)) self.critic_filename_local = os.path.join( self.model_dir, 'my-model_{}_critic_local.h5'.format(date_of_file)) self.actor_filename_target = os.path.join( self.model_dir, 'my-model_{}_actor_target.h5'.format(date_of_file)) self.critic_filename_target = os.path.join( self.model_dir, 'my-model_{}_critic_target.h5'.format(date_of_file)) self.actor_local.model.load_weights(self.actor_filename_local) self.critic_local.model.load_weights( self.critic_filename_local) self.actor_target.model.load_weights( self.actor_filename_target) self.critic_target.model.load_weights( self.critic_filename_target) print("Model weights loaded from file: {}, {}, {}, {}".format( self.actor_filename_local, self.critic_filename_local, self.actor_filename_target, self.critic_filename_target)) # [debug] except Exception as e: print("Unable to load model weights from file: {}, {}, {}, {}". format(self.actor_filename_local, self.critic_filename_local, self.actor_filename_target, self.critic_filename_target)) print("{}: {}".format(e.__class__.__name__, str(e))) # Set the name of the weight files to this current time stamp, even if loaded from another timestamp. self.actor_filename_local = os.path.join( self.model_dir, "{}_actor_local{}".format(self.model_name, self.model_ext)) self.critic_filename_local = os.path.join( self.model_dir, "{}_critic_local{}".format(self.model_name, self.model_ext)) self.actor_filename_target = os.path.join( self.model_dir, "{}_actor_target{}".format(self.model_name, self.model_ext)) self.critic_filename_target = os.path.join( self.model_dir, "{}_critic_target{}".format(self.model_name, self.model_ext)) if self.save_weights_every: print("Saving model weights", "every {} episodes".format(self.save_weights_every) if self.save_weights_every else "disabled") # [debug] # Episode variables self.episode = 0 self.reset_episode_vars()
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space # Constrain state and action spaces self.state_size = 1 # position only self.state_range = self.task.observation_space.high[ 2] - self.task.observation_space.low[2] self.action_size = 1 # force only self.action_range = self.task.action_space.high[ 2] - self.task.action_space.low[2] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # Actor (Policy) Model self.action_low = self.task.action_space.low[2] self.action_high = self.task.action_space.high[2] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) #print('Noise generated') # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) print('Replay Buffer initialized') # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.episode_num = 1
def __init__(self, task): # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] # Save model weights to a file # Load/save parameters self.load_weights = True # try to load weights from previously saved models self.save_weights_every = 1 # save weights every n episodes, None to disable self.model_dir = util.get_param( 'out' ) # you can use a separate subdirectory for each task and/or neural net architecture self.model_name = "land" self.model_ext = ".h5" if self.load_weights or self.save_weights_every: self.actor_filename = os.path.join( self.model_dir, "{}_actor{}".format(self.model_name, self.model_ext)) self.critic_filename = os.path.join( self.model_dir, "{}_critic{}".format(self.model_name, self.model_ext)) print("Actor filename :", self.actor_filename) #[debug] print("Critic filename:", self.critic_filename) # [debug] # Task (environment) information self.task = task # should contain observation_space and action_space #self.state_size = np.prod(self.task.observation_space.shape) self.state_size = 1 self.state_range = self.task.observation_space.high - self.task.observation_space.low #self.action_size = np.prod(self.task.action_space.shape) self.action_size = 1 self.action_range = self.task.action_space.high - self.task.action_space.low # Policy parameters # self.w = np.random.normal( # size=(self.state_size, self.action_size), # weights for simple linear policy: state_space x action_space # scale=(self.action_range / (2 * self.state_size)).reshape(1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.episode = 0 self.reset_episode_vars() # Actor (Policy) Model # self.action_low = self.task.action_space.low # self.action_high = self.task.action_space.high self.action_low = self.task.action_space.low[2:3] self.action_high = self.task.action_space.high[2:3] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Load pre-trained model weights, if available if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) if self.save_weights_every: print("Saving model weights", "every {} episodes".format(self.save_weights_every) if self.save_weights_every else "disabled") #[debug] # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters
def __init__(self, task): print('start DDPG') self.task = task self.state_size = 1 self.action_size = 1 self.space_low = self.task.observation_space.low[2:3] self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save # Episode variables self.reset_episode_vars() self.actor_learning_rate = 0.0001 self.tau = 0.99 self.mini_batch_size = 64 self.buffer_size = 100000 self.critic_learning_rate = 0.001 self.gamma = 0.88 self.episode = 0 # Load/save parameters self.load_weights = False # try to load weights from previously saved models self.save_weights_every = 50 # save weights every n episodes, None to disable self.model_dir = util.get_param( 'out') # you can use a separate subdirectory for each task and/or neural net architecture self.model_name = "my-model4" #my-model3 self.model_ext = ".h5" if self.load_weights or self.save_weights_every: self.actor_filename = os.path.join(self.model_dir, "{}_actor{}".format(self.model_name, self.model_ext)) self.critic_filename = os.path.join(self.model_dir, "{}_critic{}".format(self.model_name, self.model_ext)) print("Actor filename :", self.actor_filename) # [debug] print("Critic filename:", self.critic_filename) # [debug] self.memory = ReplayBuffer(self.buffer_size) self.action_low = self.task.action_space.low[2:3] self.action_high = self.task.action_space.high[2:3] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) if self.save_weights_every: print("Saving model weights", "every {} episodes".format( self.save_weights_every) if self.save_weights_every else "disabled") # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) self.actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(self.action_size))
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space # Load/save parameters self.load_weights = True # try to load weights from previously saved models self.save_weights_every = 5 # save weights every n episodes, None to disable self.model_dir = util.get_param( 'out' ) # you can use a separate subdirectory for each task and/or neural net architecture self.model_name = "ddpg_takeoff" self.model_ext = ".h5" if self.load_weights or self.save_weights_every: self.actor_filename = os.path.join( self.model_dir, "{}_actor{}".format(self.model_name, self.model_ext)) self.critic_filename = os.path.join( self.model_dir, "{}_critic{}".format(self.model_name, self.model_ext)) print("Actor filename :", self.actor_filename) # [debug] print("Critic filename:", self.critic_filename) # [debug] # Constrain state and action spaces self.state_size = 1 # position only self.state_range = self.task.observation_space.high[ 2] - self.task.observation_space.low[2] self.action_size = 1 # force only self.action_range = self.task.action_space.high[ 2] - self.task.action_space.low[2] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # Actor (Policy) Model self.action_low = self.task.action_space.low[2] self.action_high = self.task.action_space.high[2] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Load pre-trained model weights, if available if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) if self.save_weights_every: print("Saving model weights", "every {} episodes".format(self.save_weights_every) if self.save_weights_every else "disabled") # [debug] # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) print('Replay Buffer initialized') # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters # Episode variables self.episode_num = 0 self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "ddpg_takeoff_stats_{}.csv".format( util.get_timestamp())) # path to CSV file
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space self.load_weights = False ##############OVERRRIDE############################### self.state_size = 6 self.action_size = 3 # self.state_size = self.task.observation_space.shape[0] # self.action_size = self.task.action_space.shape[0] ###################################################### # Weights Saver self.model_ext = ".h5" self.save_weights_every = 100 if self.load_weights: self.actor_filename = os.path.join( util.get_param('out'), "model_hover_2018-02-20_18-11-35_actor{}".format( self.model_ext)) self.critic_filename = os.path.join( util.get_param('out'), "model_hover_2018-02-20_18-11-35_critic{}".format( self.model_ext)) print("Actor filename :", self.actor_filename) # [debug] print("Critic filename:", self.critic_filename) # [debug] elif self.save_weights_every: self.actor_filename = os.path.join( util.get_param('out'), "model_{}_actor{}".format(util.get_timestamp(), self.model_ext)) self.critic_filename = os.path.join( util.get_param('out'), "model_{}_critic{}".format(util.get_timestamp(), self.model_ext)) print("Actor filename :", self.actor_filename) # [debug] print("Critic filename:", self.critic_filename) # [debug] print('State Size : {}, Action Size : {}'.format( self.state_size, self.action_size)) self.action_high = self.task.action_space.high[0:self.action_size] self.action_low = self.task.action_space.low[0:self.action_size] print('Action LOW : {}, Action HIGH : {}'.format( self.action_low, self.action_high)) self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, 0.001) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, 0.001) self.critic_local = Critic(self.state_size, self.action_size, 0.001) self.critic_target = Critic(self.state_size, self.action_size, 0.001) # Load pre-trained model weights, if available if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) # Policy parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.buffer_size = 10000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters self.noise = OUNoise(self.action_size) # Stats Writer self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'avg_reward'] # specify columns to save print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] self.episode_num = 0 self.reset_episode_vars()
def __init__(self, task): self.task = task self.state_size = 2 self.state_low = self.task.observation_space.low[0:3] self.state_high = self.task.observation_space.high[0:3] self.state_range = self.state_high - self.state_low self.action_size = 1 # self.action_low = task.action_space.low[0:3] self.action_low = task.action_space.low[2] # self.action_high = task.action_space.high[0:3] self.action_high = task.action_space.high[2] self.action_range = self.action_high - self.action_low print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) ################ # SAVE WEIGHTS # ################ self.load_weights = True self.save_weights_every = 10 # save weights every n episodes, None to disable self.model_dir = util.get_param( 'out' ) # you can use a separate subdirectory for each task and/or neural net architecture self.model_name = "task3-LANDING" self.model_ext = ".h5" if self.load_weights or self.save_weights_every: # Define Actor weights h5 file self.actor_filename = os.path.join( self.model_dir, util.get_param('task'), "{}_actor{}".format(self.model_name, self.model_ext)) # Define Critic weights h5 file self.critic_filename = os.path.join( self.model_dir, util.get_param('task'), "{}_critic{}".format(self.model_name, self.model_ext)) # Debug print statements print("Actor filename :", self.actor_filename) # [debug] print("Critic filename:", self.critic_filename) # [debug] ################################### # CREATE ACTOR AND CRITIC OBJECTS # ################################### self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) ################################################ # Load pre-trained model weights, if available # ################################################ if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) if self.save_weights_every: print("Saving model weights", "every {} episodes".format(self.save_weights_every) if self.save_weights_every else "disabled") # [debug] ############### # SET WEIGHTS # ############### # target model gets weights from local model self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) #################### # BUFFER VARIABLES # #################### self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) self.noise = OUNoise(self.action_size) self.gamma = 0.99 self.tau = 0.001 self.last_state = None self.last_action = None self.reward_vector = [] self.episode_count = 0 self.step_count = 0 self.episode = 0 ############################### # SAVE EP REWARDS TO CSV FILE # ############################### self.total_reward = 0 self.stats_filename = os.path.join( util.get_param('out'), util.get_param('task'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename))
def __init__(self, task): self.task = task self.state_size = 3 # position only self.action_size = 3 # force only self.action_low = self.task.action_space.low[0:3] self.action_high = self.task.action_space.high[0:3] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) #load/save parameters self.load_weights = True # try to load weights from previously saved models self.save_weights_every = 100 # None to disable self.model_dir = util.get_param('out') self.model_name = "my-model" self.model_ext = ".h5" self.episode = 0 if self.load_weights or self.save_weights_every: self.actor_filename = os.path.join( self.model_dir, "{}_actor{}".format(self.model_name, self.model_ext)) self.critic_filename = os.path.join( self.model_dir, "{}_critic{}".format(self.model_name, self.model_ext)) print("Actor filename:", self.actor_filename) print("Critic filename:", self.critic_filename) # Actor(Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic(Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Load pre-trained model weights, if available if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) if self.save_weights_every: print("Saving model weights", "every {} episodes".format(self.save_weights_every) if self.save_weights_every else "disabled") # [debug] # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 #discount factor self.tau = 0.001 # for soft self.rewards_list = [] self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] # specify column to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # debug
def __init__(self, task): self.task = task # task passed to agent self.state_size = 3 # now includes which state (takeoff=0 or landing=1) self.state_low = self.task.observation_space.low[ 0:3] # min position x,y,z self.state_high = self.task.observation_space.high[ 0:3] # max position x,y,z self.state_range = self.state_high - self.state_low # position ranges self.action_size = 1 # constrained only to z direction self.action_low = task.action_space.low[2] # lowest z-force (-25.0) self.action_high = task.action_space.high[2] # highest z-force (25.0) self.action_range = self.action_high - self.action_low # range (50.0) print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # Debug ################ # SAVE WEIGHTS # ################ self.load_weights = True self.save_weights_every = 4 self.model_dir = util.get_param('out') self.model_name = "MODEL_WEIGHTS" self.model_ext = ".h5" if self.load_weights or self.save_weights_every: # Define Actor weights h5 file self.actor_filename = os.path.join( self.model_dir, util.get_param('task'), "{}_actor{}".format(self.model_name, self.model_ext)) # Define Critic weights h5 file self.critic_filename = os.path.join( self.model_dir, util.get_param('task'), "{}_critic{}".format(self.model_name, self.model_ext)) # Debug print statements print("Actor filename :", self.actor_filename) # [debug] print("Critic filename:", self.critic_filename) # [debug] ################################### # CREATE ACTOR AND CRITIC OBJECTS # ################################### self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) ################################################ # Load pre-trained model weights, if available # ################################################ if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) if self.save_weights_every: print("Saving model weights", "every {} episodes".format(self.save_weights_every) if self.save_weights_every else "disabled") # [debug] ############### # SET WEIGHTS # ############### # target model gets weights from local model self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) #################### # BUFFER VARIABLES # #################### self.buffer_size = 50000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) self.noise = OUNoise(self.action_size) self.gamma = 0.99 # Gamma stays at 0.99 self.tau = 0.01 # changed TAU to 0.01 self.last_state = None self.last_action = None self.reward_vector = [] self.episode_count = 0 self.step_count = 0 self.episode = 0 # Episode number for given state self.episode_total = 0 # Total number of episodes ############################### # SAVE EP REWARDS TO CSV FILE # # **WARNING!! STATS_COLUMNS HAS CHANGED** ############################### self.total_reward = 0 self.stats_filename = os.path.join( util.get_param('out'), util.get_param('task'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['task', 'episode', 'total_reward'] self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename))
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space ##############OVERRRIDE############################### self.state_size = 3 self.action_size = 3 # self.state_size = self.task.observation_space.shape[0] # self.action_size = self.task.action_space.shape[0] ###################################################### print('State Size : {}, Action Size : {}'.format( self.state_size, self.action_size)) self.action_high = self.task.action_space.high[0:self.action_size] self.action_low = self.task.action_space.low[0:self.action_size] print('Action LOW : {}, Action HIGH : {}'.format( self.action_low, self.action_high)) self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, 0.0001) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, 0.0001) self.critic_local = Critic(self.state_size, self.action_size, 0.001) self.critic_target = Critic(self.state_size, self.action_size, 0.001) # Policy parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.buffer_size = 10000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters self.noise = OUNoise(self.action_size) # Stats Writer self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] # Weights Saver self.model_ext = ".h5" self.save_weights_every = 100 if self.save_weights_every: self.actor_filename = os.path.join( util.get_param('out'), "{}_actor{}".format(util.get_timestamp(), self.model_ext)) self.critic_filename = os.path.join( util.get_param('out'), "{}_critic{}".format(util.get_timestamp(), self.model_ext)) print("Actor filename :", self.actor_filename) # [debug] print("Critic filename:", self.critic_filename) # [debug] self.episode_num = 0 self.reset_episode_vars()
def __init__(self, task): # Initialize environment variables self.task = task self.state_size = 2 self.state_low = np.array([self.task.observation_space.low[2], self.task.observation_space.low[9]]) self.state_high = np.array([self.task.observation_space.high[2], self.task.observation_space.high[9]]) self.state_range = self.state_high - self.state_low self.action_size = 1 # z-axis only self.action_low = self.task.action_space.low[2] self.action_high = self.task.action_space.high[2] self.action_range = self.action_high - self.action_low # Load/save parameters self.load_weights = True # try to load weights from previously saved models self.save_weights_every = 10 # save weights every n episodes, none to disable self.model_dir = util.get_param('out') self.model_name = "DDPG" self.model_ext = ".h5" if self.load_weights or self.save_weights_every: self.actor_filename = os.path.join(self.model_dir, "{}actor{}".format(self.model_name, self.model_ext)) self.critic_filename = os.path.join(self.model_dir, "{}critic{}".format(self.model_name, self.model_ext)) print("Actor filename:", self.actor_filename) # [debug] print("Critic filename", self.critic_filename) # [debug] # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Load pre-trained model weights, if avail if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) if self.save_weights_every: print("Saving model weights", "every {} episodes".format( self.save_weights_every) if self.save_weights_every else "disabled") # [debug] # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 self.tau = 0.001 # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] # Episode variables self.episode = 0 self.reset_episode_vars()
def __init__(self, task): # Task (environment) information self.task = task self.state_size = 3 self.action_size = 3 print("Original spaces:{}, {}\nConstrained spaces:{},{}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) self.action_low = self.task.action_space.low self.action_high = self.task.action_space.high # Actor(policy) model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Q-value) model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay Buffer self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Policy parameters self.gamma = 0.99 self.tau = 0.001 # Save episode stats self.stats_filename = os.path.join(util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ["episode", "total_reward"] self.episode_num = 1 # Episode variables self.reset_episode_vars()
def __init__(self, task): # Task (environment) information self.task = task self.state_size = 7 self.action_size = 3 # force only self.state_low = np.concatenate([ self.task.observation_space.low[:3], np.array([0.0, 0.0, 0.0, 0.0]) ]) self.state_high = np.concatenate([ self.task.observation_space.high[:3], self.task.observation_space.high[:3] - self.task.observation_space.low[:3], np.array([self.task.observation_space.high[2] - 10.0]) ]) self.state_range = self.state_high - self.state_low print("state low: {} state high: {} state range: {}".format( self.state_low, self.state_high, self.state_range)) # clip action self.action_low = self.task.action_space.low[:self.action_size] self.action_high = self.task.action_space.high[:self.action_size] # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Intialize target model parameters with local model parameters self.actor_target.model.set_weights(self.actor_local.model.get_weights()) self.critic_target.model.set_weights(self.critic_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # Score tracker self.best_score = -np.inf # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), 'hover/stats_{}.csv'.format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save print('Saving stats {} to {}'.format(self.stats_columns, self.stats_filename)) # [debug] # Save weights self.save_weights_every = 100 self.actor_filename = os.path.join( util.get_param('out'), 'hover/actor_checkpoints_{}.h5'.format(util.get_timestamp()) ) self.critic_filename = os.path.join( util.get_param('out'), 'hover/critic_checkpoints_{}.h5'.format(util.get_timestamp()) ) print('Actor filename: ', self.actor_filename) print('Critic filename: ', self.critic_filename) self.episode_num = 1 self.reset_episode_vars()