def __init__(self, environment=None, params=None, bark_behavior=None, observer=None): BehaviorModel.__init__(self, params) self._params = params self._observer = observer self._environment = environment self._wrapped_env = tf_py_environment.TFPyEnvironment( PyBARKEnvironment(self._environment)) self._ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64)) self._agent = self.GetAgent(self._wrapped_env, params) self._ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64), agent=self._agent) ckpt_path = self._params["ML"]["BehaviorTFAAgents"]["CheckpointPath", "", ""] self._ckpt_manager = self.GetCheckpointer( ckpt_path, self._params["ML"]["BehaviorTFAAgents"]["NumCheckpointsToKeep", "", 3]) self._best_ckpt_manager = self.GetCheckpointer( ckpt_path + "best_checkpoint/", 1) self._logger = logging.getLogger() # NOTE: by default we do not want the action to be set externally # as this enables the agents to be plug and played in BARK. self._set_action_externally = False self._bark_behavior_model = bark_behavior or BehaviorContinuousML( params)
def __init__(self, environment=None, params=None): self._sac_params = params BehaviorTFAAgent.__init__(self, environment=environment, params=params) BehaviorModel.__init__(self, params) self._replay_buffer = self.GetReplayBuffer() self._dataset = self.GetDataset() self._collect_policy = self.GetCollectionPolicy() self._eval_policy = self.GetEvalPolicy()
def __init__(self, environment=None, params=None, bark_behavior=None): BehaviorModel.__init__(self, params) self._params = params self._environment = environment self._wrapped_env = tf_py_environment.TFPyEnvironment( TFAWrapper(self._environment)) self._ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64)) self._agent = self.GetAgent(self._wrapped_env, params) self._ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64), agent=self._agent) self._ckpt_manager = self.GetCheckpointer() self._logger = logging.getLogger() # NOTE: by default we do not want the action to be set externally # as this enables the agents to be plug and played in BARK. self._set_action_externally = False self._bark_behavior_model = bark_behavior or BehaviorContinuousML( params)
def Plan(self, dt, observed_world): # NOTE: if training is enabled the action is set externally if not self._set_action_externally: # NOTE: we need to store the observer differently observed_state = self._environment._observer.Observe( observed_world) self._action = self.Act(observed_state) # NOTE: BARK expects (m, 1) actions action = self._action if isinstance(self.action_space, BoundedContinuous): action = np.reshape(self._action, (-1, 1)) # set action to be executed self._bark_behavior_model.ActionToBehavior(action) trajectory = self._bark_behavior_model.Plan(dt, observed_world) next_action = self._bark_behavior_model.GetLastAction() # NOTE: BARK requires models to have trajectories of the past BehaviorModel.SetLastTrajectory(self, trajectory) BehaviorModel.SetLastAction(self, next_action) return trajectory
def Plan(self, dt, observed_world): # NOTE: if training is enabled the action is set externally if not self._set_action_externally: observed_state = self.env._observer.Observe(observed_world) action = self.Act(observed_state) self._action = action action = self._action # set action to be executed self._bark_behavior_model.ActionToBehavior(action) trajectory = self._bark_behavior_model.Plan(dt, observed_world) # NOTE: BARK requires models to have trajectories of the past BehaviorModel.SetLastTrajectory(self, trajectory) return trajectory
def __init__(self, params=None): BehaviorModel.__init__(self, params) self._dynamic_behavior_model = BehaviorDynamicModel(params)
def __init__(self, dynamic_model=None, params=None): # BehaviorDynamicModel.__init__(self, dynamic_model, params) BehaviorModel.__init__(self, params) self._dynamic_model = dynamic_model self._params = params
def __init__(self, env, test_env, params, bark_behavior=None): BehaviorModel.__init__(self, params) self._params = params self.env = env self.test_env = test_env self._bark_behavior_model = bark_behavior or BehaviorDiscreteMacroActionsML( params) self.device = torch.device("cuda" if self._params["ML"]["BaseAgent"][ "Cuda", "", True] and torch.cuda.is_available() else "cpu") self.online_net = None self.target_net = None self.steps = 0 self.learning_steps = 0 self.episodes = 0 self.best_eval_score = -np.inf self.num_actions = self.env.action_space.n self.num_steps = self._params["ML"]["BaseAgent"]["NumSteps", "", 5000000] self.batch_size = self._params["ML"]["BaseAgent"]["BatchSize", "", 32] self.double_q_learning = self._params["ML"]["BaseAgent"][ "Double_q_learning", "", False] self.dueling_net = self._params["ML"]["BaseAgent"]["DuelingNet", "", False] self.noisy_net = self._params["ML"]["BaseAgent"]["NoisyNet", "", False] self.use_per = self._params["ML"]["BaseAgent"]["Use_per", "", False] self.reward_log_interval = self._params["ML"]["BaseAgent"][ "RewardLogInterval", "", 5] self.summary_log_interval = self._params["ML"]["BaseAgent"][ "SummaryLogInterval", "", 100] self.eval_interval = self._params["ML"]["BaseAgent"]["EvalInterval", "", 25000] self.num_eval_steps = self._params["ML"]["BaseAgent"]["NumEvalSteps", "", 12500] self.gamma_n = \ self._params["ML"]["BaseAgent"]["Gamma", "", 0.99] ** \ self._params["ML"]["BaseAgent"]["Multi_step", "", 1] self.start_steps = self._params["ML"]["BaseAgent"]["StartSteps", "", 5000] self.epsilon_train = LinearAnneaer( 1.0, self._params["ML"]["BaseAgent"]["EpsilonTrain", "", 0.01], self._params["ML"]["BaseAgent"]["EpsilonDecaySteps", "", 25000]) self.epsilon_eval = self._params["ML"]["BaseAgent"]["EpsilonEval", "", 0.001] self.update_interval = \ self._params["ML"]["BaseAgent"]["Update_interval", "", 4] self.target_update_interval = self._params["ML"]["BaseAgent"][ "TargetUpdateInterval", "", 5000] self.max_episode_steps = \ self._params["ML"]["BaseAgent"]["MaxEpisodeSteps", "", 10000] self.grad_cliping = self._params["ML"]["BaseAgent"]["GradCliping", "", 5.0] self.summary_dir = \ self._params["ML"]["BaseAgent"]["SummaryPath", "", ""] self.model_dir = \ self._params["ML"]["BaseAgent"]["CheckpointPath", "", ""] if not os.path.exists(self.model_dir) and self.model_dir: os.makedirs(self.model_dir) if not os.path.exists(self.summary_dir) and self.summary_dir: os.makedirs(self.summary_dir) self.writer = SummaryWriter(log_dir=self.summary_dir) self.train_return = RunningMeanStats(self.summary_log_interval) # NOTE: by default we do not want the action to be set externally # as this enables the agents to be plug and played in BARK. self._set_action_externally = False # Replay memory which is memory-efficient to store stacked frames. if self.use_per: beta_steps = (self.num_steps - self.start_steps) / \ self.update_interval self.memory = LazyPrioritizedMultiStepMemory( self._params["ML"]["BaseAgent"]["MemorySize", "", 10**6], self.env.observation_space.shape, self.device, self._params["ML"]["BaseAgent"]["Gamma", "", 0.99], self._params["ML"]["BaseAgent"]["Multi_step", "", 1], beta_steps=beta_steps) else: self.memory = LazyMultiStepMemory( self._params["ML"]["BaseAgent"]["MemorySize", "", 10**6], self.env.observation_space.shape, self.device, self._params["ML"]["BaseAgent"]["Gamma", "", 0.99], self._params["ML"]["BaseAgent"]["Multi_step", "", 1])