def __init__( self, brain: BrainParameters, trainer_parameters: dict, training: bool, run_id: int, reward_buff_cap: int = 1, ): """ Responsible for collecting experiences and training a neural network model. :BrainParameters brain: Brain to be trained. :dict trainer_parameters: The parameters for the trainer (dictionary). :bool training: Whether the trainer is set for training. :int run_id: The identifier of the current run :int reward_buff_cap: """ self.param_keys: List[str] = [] self.brain_name = brain.brain_name self.run_id = run_id self.trainer_parameters = trainer_parameters self.summary_path = trainer_parameters["summary_path"] if not os.path.exists(self.summary_path): os.makedirs(self.summary_path) self.cumulative_returns_since_policy_update: List[float] = [] self.is_training = training self.stats: Dict[str, List] = defaultdict(list) self.trainer_metrics = TrainerMetrics(path=self.summary_path + ".csv", brain_name=self.brain_name) self.summary_writer = tf.summary.FileWriter(self.summary_path) self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap) self.policy: Policy = None
def test_experience_collection_timer(self): mock_path = 'fake' mock_brain_name = 'fake' trainer_metrics = TrainerMetrics(path=mock_path, brain_name=mock_brain_name) trainer_metrics.start_experience_collection_timer() trainer_metrics.end_experience_collection_timer() assert trainer_metrics.delta_last_experience_collection == 0
def test_policy_update_timer(self): mock_path = 'fake' mock_brain_name = 'fake' fake_buffer_length = 350 fake_mean_return = 0.3 trainer_metrics = TrainerMetrics(path=mock_path, brain_name=mock_brain_name) trainer_metrics.start_experience_collection_timer() trainer_metrics.end_experience_collection_timer() trainer_metrics.start_policy_update_timer(number_experiences=fake_buffer_length, mean_return=fake_mean_return) trainer_metrics.end_policy_update() fake_row = [mock_brain_name, 0,0, 0, 350, '0.300'] assert trainer_metrics.rows[0] == fake_row
def __init__(self, brain, trainer_parameters, training, run_id): """ Responsible for collecting experiences and training a neural network model. :BrainParameters brain: Brain to be trained. :dict trainer_parameters: The parameters for the trainer (dictionary). :bool training: Whether the trainer is set for training. :int run_id: The identifier of the current run """ self.param_keys = [] self.brain_name = brain.brain_name self.run_id = run_id self.trainer_parameters = trainer_parameters self.summary_path = trainer_parameters['summary_path'] if not os.path.exists(self.summary_path): os.makedirs(self.summary_path) self.cumulative_returns_since_policy_update = [] self.is_training = training self.stats = {} self.trainer_metrics = TrainerMetrics(path=self.summary_path + '.csv', brain_name=self.brain_name) self.summary_writer = tf.summary.FileWriter(self.summary_path) self.policy = None