예제 #1
0
파일: trainer.py 프로젝트: znova/ml-agents
 def __init__(
     self,
     brain: BrainParameters,
     trainer_parameters: dict,
     training: bool,
     run_id: int,
     reward_buff_cap: int = 1,
 ):
     """
     Responsible for collecting experiences and training a neural network model.
     :BrainParameters brain: Brain to be trained.
     :dict trainer_parameters: The parameters for the trainer (dictionary).
     :bool training: Whether the trainer is set for training.
     :int run_id: The identifier of the current run
     :int reward_buff_cap:
     """
     self.param_keys: List[str] = []
     self.brain_name = brain.brain_name
     self.run_id = run_id
     self.trainer_parameters = trainer_parameters
     self.summary_path = trainer_parameters["summary_path"]
     if not os.path.exists(self.summary_path):
         os.makedirs(self.summary_path)
     self.cumulative_returns_since_policy_update: List[float] = []
     self.is_training = training
     self.stats: Dict[str, List] = defaultdict(list)
     self.trainer_metrics = TrainerMetrics(path=self.summary_path + ".csv",
                                           brain_name=self.brain_name)
     self.summary_writer = tf.summary.FileWriter(self.summary_path)
     self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
     self.policy: Policy = None
예제 #2
0
 def test_experience_collection_timer(self):
     mock_path = 'fake'
     mock_brain_name = 'fake'
     trainer_metrics = TrainerMetrics(path=mock_path,
                                      brain_name=mock_brain_name)
     trainer_metrics.start_experience_collection_timer()
     trainer_metrics.end_experience_collection_timer()
     assert trainer_metrics.delta_last_experience_collection == 0
예제 #3
0
 def test_policy_update_timer(self):
     mock_path = 'fake'
     mock_brain_name = 'fake'
     fake_buffer_length =  350
     fake_mean_return = 0.3
     trainer_metrics = TrainerMetrics(path=mock_path,
                                      brain_name=mock_brain_name)
     trainer_metrics.start_experience_collection_timer()
     trainer_metrics.end_experience_collection_timer()
     trainer_metrics.start_policy_update_timer(number_experiences=fake_buffer_length,
                                               mean_return=fake_mean_return)
     trainer_metrics.end_policy_update()
     fake_row = [mock_brain_name, 0,0, 0, 350, '0.300']
     assert trainer_metrics.rows[0] == fake_row
예제 #4
0
 def __init__(self, brain, trainer_parameters, training, run_id):
     """
     Responsible for collecting experiences and training a neural network model.
     :BrainParameters brain: Brain to be trained.
     :dict trainer_parameters: The parameters for the trainer (dictionary).
     :bool training: Whether the trainer is set for training.
     :int run_id: The identifier of the current run
     """
     self.param_keys = []
     self.brain_name = brain.brain_name
     self.run_id = run_id
     self.trainer_parameters = trainer_parameters
     self.summary_path = trainer_parameters['summary_path']
     if not os.path.exists(self.summary_path):
         os.makedirs(self.summary_path)
     self.cumulative_returns_since_policy_update = []
     self.is_training = training
     self.stats = {}
     self.trainer_metrics = TrainerMetrics(path=self.summary_path + '.csv',
                                           brain_name=self.brain_name)
     self.summary_writer = tf.summary.FileWriter(self.summary_path)
     self.policy = None