Exemple #1
0
 def __init__(self,
              environment=None,
              params=None,
              bark_behavior=None,
              observer=None):
     BehaviorModel.__init__(self, params)
     self._params = params
     self._observer = observer
     self._environment = environment
     self._wrapped_env = tf_py_environment.TFPyEnvironment(
         PyBARKEnvironment(self._environment))
     self._ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64))
     self._agent = self.GetAgent(self._wrapped_env, params)
     self._ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64),
                                      agent=self._agent)
     ckpt_path = self._params["ML"]["BehaviorTFAAgents"]["CheckpointPath",
                                                         "", ""]
     self._ckpt_manager = self.GetCheckpointer(
         ckpt_path,
         self._params["ML"]["BehaviorTFAAgents"]["NumCheckpointsToKeep", "",
                                                 3])
     self._best_ckpt_manager = self.GetCheckpointer(
         ckpt_path + "best_checkpoint/", 1)
     self._logger = logging.getLogger()
     # NOTE: by default we do not want the action to be set externally
     #       as this enables the agents to be plug and played in BARK.
     self._set_action_externally = False
     self._bark_behavior_model = bark_behavior or BehaviorContinuousML(
         params)
Exemple #2
0
 def __init__(self, environment=None, params=None):
     self._sac_params = params
     BehaviorTFAAgent.__init__(self, environment=environment, params=params)
     BehaviorModel.__init__(self, params)
     self._replay_buffer = self.GetReplayBuffer()
     self._dataset = self.GetDataset()
     self._collect_policy = self.GetCollectionPolicy()
     self._eval_policy = self.GetEvalPolicy()
Exemple #3
0
 def __init__(self, environment=None, params=None, bark_behavior=None):
     BehaviorModel.__init__(self, params)
     self._params = params
     self._environment = environment
     self._wrapped_env = tf_py_environment.TFPyEnvironment(
         TFAWrapper(self._environment))
     self._ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64))
     self._agent = self.GetAgent(self._wrapped_env, params)
     self._ckpt = tf.train.Checkpoint(step=tf.Variable(0, dtype=tf.int64),
                                      agent=self._agent)
     self._ckpt_manager = self.GetCheckpointer()
     self._logger = logging.getLogger()
     # NOTE: by default we do not want the action to be set externally
     #       as this enables the agents to be plug and played in BARK.
     self._set_action_externally = False
     self._bark_behavior_model = bark_behavior or BehaviorContinuousML(
         params)
 def __init__(self, params=None):
     BehaviorModel.__init__(self, params)
     self._dynamic_behavior_model = BehaviorDynamicModel(params)
 def __init__(self, dynamic_model=None, params=None):
     # BehaviorDynamicModel.__init__(self, dynamic_model, params)
     BehaviorModel.__init__(self, params)
     self._dynamic_model = dynamic_model
     self._params = params
Exemple #6
0
    def __init__(self, env, test_env, params, bark_behavior=None):
        BehaviorModel.__init__(self, params)
        self._params = params
        self.env = env
        self.test_env = test_env
        self._bark_behavior_model = bark_behavior or BehaviorDiscreteMacroActionsML(
            params)

        self.device = torch.device("cuda" if self._params["ML"]["BaseAgent"][
            "Cuda", "", True] and torch.cuda.is_available() else "cpu")

        self.online_net = None
        self.target_net = None

        self.steps = 0
        self.learning_steps = 0
        self.episodes = 0
        self.best_eval_score = -np.inf
        self.num_actions = self.env.action_space.n
        self.num_steps = self._params["ML"]["BaseAgent"]["NumSteps", "",
                                                         5000000]
        self.batch_size = self._params["ML"]["BaseAgent"]["BatchSize", "", 32]

        self.double_q_learning = self._params["ML"]["BaseAgent"][
            "Double_q_learning", "", False]
        self.dueling_net = self._params["ML"]["BaseAgent"]["DuelingNet", "",
                                                           False]
        self.noisy_net = self._params["ML"]["BaseAgent"]["NoisyNet", "", False]
        self.use_per = self._params["ML"]["BaseAgent"]["Use_per", "", False]

        self.reward_log_interval = self._params["ML"]["BaseAgent"][
            "RewardLogInterval", "", 5]
        self.summary_log_interval = self._params["ML"]["BaseAgent"][
            "SummaryLogInterval", "", 100]
        self.eval_interval = self._params["ML"]["BaseAgent"]["EvalInterval",
                                                             "", 25000]
        self.num_eval_steps = self._params["ML"]["BaseAgent"]["NumEvalSteps",
                                                              "", 12500]
        self.gamma_n = \
         self._params["ML"]["BaseAgent"]["Gamma", "", 0.99] ** \
         self._params["ML"]["BaseAgent"]["Multi_step", "", 1]

        self.start_steps = self._params["ML"]["BaseAgent"]["StartSteps", "",
                                                           5000]
        self.epsilon_train = LinearAnneaer(
            1.0, self._params["ML"]["BaseAgent"]["EpsilonTrain", "", 0.01],
            self._params["ML"]["BaseAgent"]["EpsilonDecaySteps", "", 25000])
        self.epsilon_eval = self._params["ML"]["BaseAgent"]["EpsilonEval", "",
                                                            0.001]
        self.update_interval = \
         self._params["ML"]["BaseAgent"]["Update_interval", "", 4]
        self.target_update_interval = self._params["ML"]["BaseAgent"][
            "TargetUpdateInterval", "", 5000]
        self.max_episode_steps = \
         self._params["ML"]["BaseAgent"]["MaxEpisodeSteps",  "", 10000]
        self.grad_cliping = self._params["ML"]["BaseAgent"]["GradCliping", "",
                                                            5.0]

        self.summary_dir = \
         self._params["ML"]["BaseAgent"]["SummaryPath", "", ""]
        self.model_dir = \
         self._params["ML"]["BaseAgent"]["CheckpointPath", "", ""]

        if not os.path.exists(self.model_dir) and self.model_dir:
            os.makedirs(self.model_dir)
        if not os.path.exists(self.summary_dir) and self.summary_dir:
            os.makedirs(self.summary_dir)

        self.writer = SummaryWriter(log_dir=self.summary_dir)
        self.train_return = RunningMeanStats(self.summary_log_interval)

        # NOTE: by default we do not want the action to be set externally
        #       as this enables the agents to be plug and played in BARK.
        self._set_action_externally = False

        # Replay memory which is memory-efficient to store stacked frames.
        if self.use_per:
            beta_steps = (self.num_steps - self.start_steps) / \
                   self.update_interval
            self.memory = LazyPrioritizedMultiStepMemory(
                self._params["ML"]["BaseAgent"]["MemorySize", "", 10**6],
                self.env.observation_space.shape,
                self.device,
                self._params["ML"]["BaseAgent"]["Gamma", "", 0.99],
                self._params["ML"]["BaseAgent"]["Multi_step", "", 1],
                beta_steps=beta_steps)
        else:
            self.memory = LazyMultiStepMemory(
                self._params["ML"]["BaseAgent"]["MemorySize", "", 10**6],
                self.env.observation_space.shape, self.device,
                self._params["ML"]["BaseAgent"]["Gamma", "", 0.99],
                self._params["ML"]["BaseAgent"]["Multi_step", "", 1])