def _initialize(self): """Initialize non-common things.""" if not self.args.test: # load demo replay memory demos = self._load_demos() if self.use_n_step: demos, demos_n_step = common_utils.get_n_step_info_from_demo( demos, self.hyper_params["N_STEP"], self.hyper_params["GAMMA"]) self.memory_n = ReplayBuffer( buffer_size=self.hyper_params["BUFFER_SIZE"], n_step=self.hyper_params["N_STEP"], gamma=self.hyper_params["GAMMA"], demo=demos_n_step, ) # replay memory self.beta = self.hyper_params["PER_BETA"] self.memory = PrioritizedReplayBuffer( self.hyper_params["BUFFER_SIZE"], self.hyper_params["BATCH_SIZE"], demo=demos, alpha=self.hyper_params["PER_ALPHA"], epsilon_d=self.hyper_params["PER_EPS_DEMO"], )
def _initialize(self): """Initialize non-common things.""" self.use_n_step = self.hyper_params["N_STEP"] > 1 if not self.args.test: # load demo replay memory with open(self.args.demo_path, "rb") as f: demos = pickle.load(f) if self.use_n_step: demos, demos_n_step = common_utils.get_n_step_info_from_demo( demos, self.hyper_params["N_STEP"], self.hyper_params["GAMMA"] ) # replay memory for multi-steps self.memory_n = NStepTransitionBuffer( buffer_size=self.hyper_params["BUFFER_SIZE"], n_step=self.hyper_params["N_STEP"], gamma=self.hyper_params["GAMMA"], demo=demos_n_step, ) # replay memory self.beta = self.hyper_params["PER_BETA"] self.memory = PrioritizedReplayBufferfD( self.hyper_params["BUFFER_SIZE"], self.hyper_params["BATCH_SIZE"], demo=demos, alpha=self.hyper_params["PER_ALPHA"], epsilon_d=self.hyper_params["PER_EPS_DEMO"], )