def worker_process(remote: multiprocessing.connection.Connection, env_config, worker_id: int): """Initializes the environment and executes its interface. Arguments: remote {multiprocessing.connection.Connection} -- Parent thread env_config {dict} -- The configuration data of the desired environment worker_id {int} -- Id for the environment's process. This is necessary for Unity ML-Agents environments, because these operate on different ports. """ # Initialize and wrap the environment try: env = wrap_environment(env_config, worker_id) except KeyboardInterrupt: pass # Communication interface of the environment thread while True: try: cmd, data = remote.recv() if cmd == "step": remote.send(env.step(data)) elif cmd == "reset": remote.send(env.reset(data)) elif cmd == "close": remote.send(env.close()) remote.close() break else: raise NotImplementedError except: break
def main(): # Docopt command line arguments _USAGE = """ Usage: evaluate.py [options] evaluate.py --help Options: --config=<path> Path of the Config file [default: ./configs/default.yaml]. --worker-id=<n> Sets the port for each environment instance [default: 2]. --path=<path> Specifies the tag of the tensorboard summaries [default: None]. --name=<path> Specifies the full path to save the output file [default: results.res]. """ options = docopt(_USAGE) config_path = options["--config"] worker_id = int(options["--worker-id"]) path = options["--path"] name = options["--name"] # Load environment, model, evaluation and training parameters configs = YamlParser(config_path).get_config() # Determine cuda availability device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Create dummy environment to retrieve the shapes of the observation and action space for further processing print("Step 1: Creating dummy environment of type " + configs["environment"]["type"]) dummy_env = wrap_environment(configs["environment"], worker_id) visual_observation_space = dummy_env.visual_observation_space vector_observation_space = dummy_env.vector_observation_space if isinstance(dummy_env.action_space, spaces.Discrete): action_space_shape = (dummy_env.action_space.n, ) else: action_space_shape = tuple(dummy_env.action_space.nvec) dummy_env.close() # Init evaluator print("Step 1: Environment Config") for k, v in configs["environment"].items(): print("Step 1: " + str(k) + ": " + str(v)) print("Step 2: Evaluation Config") for k, v in configs["evaluation"].items(): print("Step 2: " + str(k) + ": " + str(v)) print("Step 2: Init Evaluator") evaluator = Evaluator(configs["evaluation"], configs["environment"], worker_id, visual_observation_space, vector_observation_space) # Load checkpoint paths print("Step 3: Load Checkpoint Paths") checkpoints = get_sorted_checkpoints(path) print("Step 3: Number of Loaded Checkpoint Paths: " + str(len(checkpoints))) # Evaluate checkpoints print("Step 4: Start Evaluation . . .") print("Progress:") results = [] current_checkpoint = 0 for checkpoint in checkpoints: _, res = evaluator.evaluate(torch.load(checkpoint), device) results.append(res) current_checkpoint = current_checkpoint + 1 prog = current_checkpoint / len(checkpoints) print(f"\r{prog:.2f}", end='', flush=True) evaluator.close() # Save results to file print("") print("Step 5: Save to File: " + name) results = np.asarray(results).reshape(len(checkpoints), len(configs["evaluation"]["seeds"]), configs["evaluation"]["n_workers"]) outfile = open(name, "wb") pickle.dump(results, outfile) outfile.close()
def main(): # Docopt command line arguments _USAGE = """ Usage: evaluate.py [options] evaluate.py --help Options: --config=<path> Path of the Config file [default: ./configs/default.yaml]. --untrained Whether an untrained model should be used [default: False]. --worker-id=<n> Sets the port for each environment instance [default: 2]. --run-id=<path> Specifies the tag of the tensorboard summaries [default: default]. """ options = docopt(_USAGE) untrained = options["--untrained"] config_path = options["--config"] worker_id = int(options["--worker-id"]) run_id = options["--run-id"] # Load environment, model, evaluation and training parameters configs = YamlParser(config_path).get_config() # Determine cuda availability device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Create dummy environment to retrieve the shapes of the observation and action space for further processing print("Step 1: Creating dummy environment of type " + configs["environment"]["type"]) dummy_env = wrap_environment(configs["environment"], worker_id) visual_observation_space = dummy_env.visual_observation_space vector_observation_space = dummy_env.vector_observation_space if isinstance(dummy_env.action_space, spaces.Discrete): action_space_shape = (dummy_env.action_space.n,) else: action_space_shape = tuple(dummy_env.action_space.nvec) dummy_env.close() # Build or load model if untrained: print("Step 2: Creating model") model = OTCModel(configs["model"], visual_observation_space, vector_observation_space, action_space_shape, configs["model"]["use_recurrent"], configs["model"]["hidden_state_size"]).to(device) else: print("Step 2: Loading model from " + configs["model"]["model_path"]) model = torch.load(configs["model"]["model_path"]).to(device) model.eval() # Initialize evaluator print("Step 3: Initialize evaluator") print("Step 3: Number of Workers: " + str(configs["evaluation"]["n_workers"])) print("Step 3: Seeds: " + str(configs["evaluation"]["seeds"])) print("Step 3: Number of episodes: " + str(len(configs["evaluation"]["seeds"]) * configs["evaluation"]["n_workers"])) evaluator = Evaluator(configs["evaluation"], configs["environment"], worker_id, visual_observation_space, vector_observation_space) # Evaluate print("Step 4: Run evaluation . . .") eval_duration, raw_episode_results = evaluator.evaluate(model, device) episode_result = _process_episode_info(raw_episode_results) # Print results print("RESULT: sec={:3} mean reward={:.2f} std={:.2f} mean length={:.1f} std={:.2f}".format( eval_duration, episode_result["reward_mean"], episode_result["reward_std"], episode_result["length_mean"], episode_result["length_std"])) # Close print("Step 5: Closing evaluator . . .") evaluator.close()
def __init__(self, configs, worker_id, run_id = "default", low_mem_fix = False, out_path = "./"): """Initializes the trainer, the model, the buffer, the evaluator and launches training environments Arguments: configs {dict} -- The whole set of configurations (e.g. training and environment configs) worker_id {int} -- Specifies the offset for the port to communicate with the environment, which is needed for Unity ML-Agents environments (default: {1}) run_id {string} -- The run_id is used to tag the training runs (directory names to store summaries and checkpoints) (default: {"default"}) low_mem_fix {bool} -- Determines whethere to do the training/sampling on cpu or gpu. This is necessary for too small GPU memory capacities (default: {False}) """ # Handle Ctrl + C event, which aborts and shuts down the training process in a controlled manner signal(SIGINT, self._handler) # Create directories for storing checkpoints, logs and tensorboard summaries based on the current time and provided run_id if not os.path.exists(out_path + "summaries"): os.makedirs(out_path + "summaries") if not os.path.exists(out_path + "checkpoints"): os.makedirs(out_path + "checkpoints") if not os.path.exists(out_path + "logs") or not os.path.exists(out_path + "logs/" + run_id): os.makedirs(out_path + "logs/" + run_id) timestamp = time.strftime("/%Y%m%d-%H%M%S"+ "_" + str(worker_id) + "/") self.checkpoint_path = out_path + "checkpoints/" + run_id + timestamp os.makedirs(self.checkpoint_path) # Setup logger logging.basicConfig(level = logging.INFO, handlers=[]) self.logger = logging.getLogger("train") console = logging.StreamHandler() console.setFormatter(logging.Formatter("%(asctime)s: %(message)s", "%Y-%m-%d %H:%M:%S")) path = out_path + "logs/" + run_id + timestamp[:-1] + ".log" logfile = logging.FileHandler(path, mode="w") self.logger.addHandler(console) self.logger.addHandler(logfile) # Determine cuda availability self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Init members self.worker_id = worker_id self.run_id = run_id self.low_mem_fix = low_mem_fix if self.low_mem_fix: self.mini_batch_device = torch.device("cpu") else: self.mini_batch_device = self.device self.configs = configs self.resume_at = configs["trainer"]['resume_at'] self.gamma = configs["trainer"]['gamma'] self.lamda = configs["trainer"]['lamda'] self.updates = configs["trainer"]['updates'] self.epochs = configs["trainer"]['epochs'] self.n_workers = configs["trainer"]['n_workers'] self.worker_steps = configs["trainer"]['worker_steps'] self.n_mini_batch = configs["trainer"]['n_mini_batch'] self.recurrence = None if not "recurrence" in configs["model"] else configs["model"]["recurrence"] self.lr_schedule = configs["trainer"]['learning_rate_schedule'] self.beta_schedule = configs["trainer"]['beta_schedule'] self.cr_schedule = configs["trainer"]['clip_range_schedule'] self.batch_size = self.n_workers * self.worker_steps self.mini_batch_size = self.batch_size // self.n_mini_batch assert (self.batch_size % self.n_mini_batch == 0), "Batch Size divided by number of mini batches has a remainder." self.writer = SummaryWriter(out_path + "summaries/" + run_id + timestamp) self._write_hyperparameters(configs) self.checkpoint_interval = configs["model"]["checkpoint_interval"] # Start logging the training setup self.logger.info("Step 1: Provided config:") for key in configs: self.logger.info("Step 1: " + str(key) + ":") for k, v in configs[key].items(): self.logger.info("Step 1: " + str(k) + ": " + str(v)) self.logger.info("Step 2: Creating dummy environment") # Create dummy environment to retrieve the shapes of the observation and action space for further processing self.dummy_env = wrap_environment(configs["environment"], worker_id) visual_observation_space = self.dummy_env.visual_observation_space vector_observation_space = self.dummy_env.vector_observation_space if isinstance(self.dummy_env.action_space, spaces.Discrete): self.action_space_shape = (self.dummy_env.action_space.n,) else: self.action_space_shape = tuple(self.dummy_env.action_space.nvec) self.dummy_env.close() self.logger.info("Step 2: Visual Observation Space: " + str(visual_observation_space)) self.logger.info("Step 2: Vector Observation Space: " + str(vector_observation_space)) self.logger.info("Step 2: Action Space Shape: " + str(self.action_space_shape)) self.logger.info("Step 2: Action Names: " + str(self.dummy_env.action_names)) # Prepare evaluator if configured self.eval = configs["evaluation"]["evaluate"] self.eval_interval = configs["evaluation"]["interval"] if self.eval: self.logger.info("Step 2b: Initializing evaluator") self.evaluator = Evaluator(configs, worker_id, visual_observation_space, vector_observation_space) # Instantiate experience/training data buffer self.buffer = Buffer( self.n_workers, self.worker_steps, self.n_mini_batch, visual_observation_space, vector_observation_space, self.action_space_shape, self.recurrence, self.device, self.mini_batch_device) # Init model self.logger.info("Step 3: Creating model") self.model = OTCModel(configs["model"], visual_observation_space, vector_observation_space, self.action_space_shape, self.recurrence).to(self.device) # Instantiate optimizer self.optimizer = optim.AdamW(self.model.parameters(), lr=self.lr_schedule["initial"]) # Load checkpoint and apply data if configs["model"]["load_model"]: self.logger.info("Step 3: Loading model from " + configs["model"]["model_path"]) checkpoint = load_checkpoint(configs["model"]["model_path"]) self.model.load_state_dict(checkpoint["model_state_dict"]) if self.recurrence is not None: self.model.set_mean_recurrent_cell_states(checkpoint["hxs"], checkpoint["cxs"]) self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) # self.resume_at = checkpoint["update"] + 1 # Set model to train mode self.model.train() # Launch workers self.logger.info("Step 4: Launching training environments of type " + configs["environment"]["type"]) self.workers = [Worker(configs["environment"], worker_id + 200 + w) for w in range(self.n_workers)] # Setup initial observations if visual_observation_space is not None: self.vis_obs = np.zeros((self.n_workers,) + visual_observation_space.shape, dtype=np.float32) else: self.vis_obs = None if vector_observation_space is not None: self.vec_obs = np.zeros((self.n_workers,) + vector_observation_space, dtype=np.float32) else: self.vec_obs = None # Setup initial recurrent cell if self.recurrence is not None: hxs, cxs = self.model.init_recurrent_cell_states(self.n_workers, self.mini_batch_device) if self.recurrence["layer_type"] == "gru": self.recurrent_cell = hxs elif self.recurrence["layer_type"] == "lstm": self.recurrent_cell = (hxs, cxs) else: self.recurrent_cell = None # Reset workers for worker in self.workers: worker.child.send(("reset", None)) # Grab initial observations for i, worker in enumerate(self.workers): vis_obs, vec_obs = worker.child.recv() if self.vis_obs is not None: self.vis_obs[i] = vis_obs if self.vec_obs is not None: self.vec_obs[i] = vec_obs
def main(): # Docopt command line arguments _USAGE = """ Usage: evaluate.py [options] evaluate.py --help Options: --config=<path> Path of the Config file [default: ./configs/default.yaml]. --untrained Whether an untrained model should be used [default: False]. --worker-id=<n> Sets the port for each environment instance [default: 2]. --video=<path> Specify a path for saving videos, if video recording is desired. The files' extension will be set automatically. [default: ./video]. """ options = docopt(_USAGE) untrained = options["--untrained"] config_path = options["--config"] worker_id = int(options["--worker-id"]) video_path = options["--video"] # Determine whether to record a video. A video is only recorded if the video flag is used. record_video = False for i, arg in enumerate(sys.argv): if "--video" in arg: record_video = True logger.info( "Step 0: Video recording enabled. Video will be saved to " + video_path) break # Load environment, model, evaluation and training parameters configs = YamlParser(config_path).get_config() # Determine cuda availability device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Create dummy environment to retrieve the shapes of the observation and action space for further processing logger.info("Step 1: Creating dummy environment of type " + configs["environment"]["type"]) dummy_env = wrap_environment(configs["environment"], worker_id) visual_observation_space = dummy_env.visual_observation_space vector_observation_space = dummy_env.vector_observation_space if isinstance(dummy_env.action_space, spaces.Discrete): action_space_shape = (dummy_env.action_space.n, ) else: action_space_shape = tuple(dummy_env.action_space.nvec) dummy_env.close() # Build or load model logger.info("Step 2: Creating model") model = OTCModel( configs["model"], visual_observation_space, vector_observation_space, action_space_shape, configs["model"]["recurrence"] if "recurrence" in configs["model"] else None).to(device) if not untrained: logger.info("Step 2: Loading model from " + configs["model"]["model_path"]) checkpoint = load_checkpoint(configs["model"]["model_path"]) model.load_state_dict(checkpoint["model_state_dict"]) if "recurrence" in configs["model"]: model.set_mean_recurrent_cell_states(checkpoint["hxs"], checkpoint["cxs"]) model.eval() # Initialize evaluator logger.info("Step 3: Initialize evaluator") logger.info("Step 3: Number of Workers: " + str(configs["evaluation"]["n_workers"])) logger.info("Step 3: Seeds: " + str(configs["evaluation"]["seeds"])) logger.info("Step 3: Number of episodes: " + str( len(configs["evaluation"]["seeds"]) * configs["evaluation"]["n_workers"])) evaluator = Evaluator(configs, worker_id, visual_observation_space, vector_observation_space, video_path, record_video) # Evaluate logger.info("Step 4: Run evaluation . . .") eval_duration, raw_episode_results = evaluator.evaluate(model, device) episode_result = _process_episode_info(raw_episode_results) # Print results logger.info( "RESULT: sec={:3} mean reward={:.2f} std={:.2f} mean length={:.1f} std={:.2f}" .format(eval_duration, episode_result["reward_mean"], episode_result["reward_std"], episode_result["length_mean"], episode_result["length_std"])) # Close logger.info("Step 5: Closing evaluator . . .") evaluator.close()
def __init__(self, configs, worker_id, run_id="default", low_mem_fix=False): """Initializes the trainer, the model, the buffer, the evaluator and launches training environments Arguments: configs {dict} -- The whole set of configurations (e.g. training and environment configs) worker_id {int} -- Specifies the offset for the port to communicate with the environment, which is needed for Unity ML-Agents environments (default: {1}) run_id {string} -- The run_id is used to tag the training runs (directory names to store summaries and checkpoints) (default: {"default"}) low_mem_fix {bool} -- Determines whethere to do the training/sampling on cpu or gpu. This is necessary for too small GPU memory capacities (default: {False}) """ # Handle Ctrl + C event, which aborts and shuts down the training process in a controlled manner signal(SIGINT, self.handler) # Create directories for storing checkpoints, models and tensorboard summaries based on the current time and provided run_id if not os.path.exists("summaries"): os.makedirs("summaries") if not os.path.exists("checkpoints"): os.makedirs("checkpoints") timestamp = time.strftime("/%Y%m%d-%H%M%S" + "_" + str(worker_id) + "/") self.checkpoint_path = "checkpoints/" + run_id + timestamp os.makedirs(self.checkpoint_path) # Determine cuda availability self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # Init members self.worker_id = worker_id self.run_id = run_id self.low_mem_fix = low_mem_fix if self.low_mem_fix: self.mini_batch_device = torch.device("cpu") else: self.mini_batch_device = self.device self.resume_at = configs["trainer"]['resume_at'] self.gamma = configs["trainer"]['gamma'] self.lamda = configs["trainer"]['lamda'] self.updates = configs["trainer"]['updates'] self.epochs = configs["trainer"]['epochs'] self.n_workers = configs["trainer"]['n_workers'] self.worker_steps = configs["trainer"]['worker_steps'] self.n_mini_batch = configs["trainer"]['n_mini_batch'] self.use_recurrent = configs["model"]["use_recurrent"] self.hidden_state_size = configs["model"]["hidden_state_size"] self.lr_schedule = configs["trainer"]['learning_rate_schedule'] self.beta_schedule = configs["trainer"]['beta_schedule'] self.cr_schedule = configs["trainer"]['clip_range_schedule'] self.batch_size = self.n_workers * self.worker_steps self.mini_batch_size = self.batch_size // self.n_mini_batch assert ( self.batch_size % self.n_mini_batch == 0 ), "Batch Size divided by number of mini batches has a remainder." self.writer = SummaryWriter("summaries/" + run_id + timestamp) self.write_hyperparameters(configs["trainer"]) self.checkpoint_interval = configs["model"]["checkpoint_interval"] print("Step 1: Provided config:") for key in configs: print("Step 1: " + str(key) + ":") for k, v in configs[key].items(): print("Step 1: " + str(k) + ": " + str(v)) print("Step 2: Creating dummy environment") # Create dummy environment to retrieve the shapes of the observation and action space for further processing self.dummy_env = wrap_environment(configs["environment"], worker_id) visual_observation_space = self.dummy_env.visual_observation_space vector_observation_space = self.dummy_env.vector_observation_space if isinstance(self.dummy_env.action_space, spaces.Discrete): self.action_space_shape = (self.dummy_env.action_space.n, ) else: self.action_space_shape = tuple(self.dummy_env.action_space.nvec) self.dummy_env.close() print("Step 2: Visual Observation Space: " + str(visual_observation_space)) print("Step 2: Vector Observation Space: " + str(vector_observation_space)) print("Step 2: Action Space Shape: " + str(self.action_space_shape)) print("Step 2: Action Names: " + str(self.dummy_env.action_names)) # Prepare evaluator if configured self.eval = configs["evaluation"]["evaluate"] self.eval_interval = configs["evaluation"]["interval"] if self.eval: print("Step 2b: Initializing evaluator") self.evaluator = Evaluator(configs["evaluation"], configs["environment"], worker_id, visual_observation_space, vector_observation_space) # Build or load model if not configs["model"]["load_model"]: print("Step 3: Creating model") self.model = OTCModel(configs["model"], visual_observation_space, vector_observation_space, self.action_space_shape, self.use_recurrent, self.hidden_state_size).to(self.device) else: print("Step 3: Loading model from " + configs["model"]["model_path"]) self.model = torch.load(configs["model"]["model_path"]).to( self.device) self.model.train() # Instantiate optimizer self.optimizer = optim.AdamW(self.model.parameters(), lr=self.lr_schedule["initial"]) # Instantiate experience/training data buffer self.buffer = Buffer(self.n_workers, self.worker_steps, self.n_mini_batch, visual_observation_space, vector_observation_space, self.action_space_shape, self.use_recurrent, self.hidden_state_size, self.device, self.mini_batch_device) # Launch workers print("Step 4: Launching training environments of type " + configs["environment"]["type"]) self.workers = [] for i in range(self.n_workers): id = worker_id + 200 + i self.workers.append(Worker(configs["environment"], id)) # Setup initial observations if visual_observation_space is not None: self.vis_obs = np.zeros( (self.n_workers, ) + visual_observation_space.shape, dtype=np.float32) else: self.vis_obs = None if vector_observation_space is not None: self.vec_obs = np.zeros( (self.n_workers, ) + vector_observation_space, dtype=np.float32) else: self.vec_obs = None # Setup initial hidden states if self.use_recurrent: self.hidden_state = torch.zeros( (self.n_workers, self.hidden_state_size), dtype=torch.float32, device=self.device) else: self.hidden_state = None # Reset workers for worker in self.workers: worker.child.send(("reset", None)) # Grab initial observations for i, worker in enumerate(self.workers): vis_obs, vec_obs = worker.child.recv() if self.vis_obs is not None: self.vis_obs[i] = vis_obs if self.vec_obs is not None: self.vec_obs[i] = vec_obs
def main(): # Docopt command line arguments _USAGE = """ Usage: enjoy.py [options] enjoy.py --help Options: --config=<path> Path of the Config file [default: ./configs/default.yaml]. --untrained Whether an untrained model should be used [default: False]. --worker-id=<n> Sets the port for each environment instance [default: 2]. --seed=<n> The to be played seed of an episode [default: 0]. --video=<path> Specify a path for saving a video, if video recording is desired. The file's extension will be set automatically. [default: ./video]. --framerate=<n> Specifies the frame rate of a video shall be rendered. [default: 6] """ options = docopt(_USAGE) untrained = options["--untrained"] config_path = options["--config"] worker_id = int(options["--worker-id"]) seed = int(options["--seed"]) video_path = options["--video"] frame_rate = options["--framerate"] # Determine whether to record a video. A video is only recorded if the video flag is used. record_video = False for i, arg in enumerate(sys.argv): if "--video" in arg: record_video = True logger.info( "Step 0: Video recording enabled. Video will be saved to " + video_path) break # Load environment, model, evaluation and training parameters configs = YamlParser(config_path).get_config() # Determine cuda availability device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Launch environment logger.info("Step 1: Launching environment") env = wrap_environment(configs["environment"], worker_id, realtime_mode=True, record_trajectory=record_video) # Retrieve observation space visual_observation_space = env.visual_observation_space vector_observation_space = env.vector_observation_space if isinstance(env.action_space, spaces.Discrete): action_space_shape = (env.action_space.n, ) else: action_space_shape = tuple(env.action_space.nvec) # Build or load model logger.info("Step 2: Creating model") model = OTCModel( configs["model"], visual_observation_space, vector_observation_space, action_space_shape, configs["model"]["recurrence"] if "recurrence" in configs["model"] else None).to(device) if not untrained: logger.info("Step 2: Loading model from " + configs["model"]["model_path"]) checkpoint = load_checkpoint(configs["model"]["model_path"]) model.load_state_dict(checkpoint["model_state_dict"]) if "recurrence" in configs["model"]: model.set_mean_recurrent_cell_states(checkpoint["hxs"], checkpoint["cxs"]) model.eval() # Reset environment logger.info("Step 3: Resetting the environment") logger.info("Step 3: Using seed " + str(seed)) reset_params = configs["environment"]["reset_params"] reset_params["seed"] = seed vis_obs, vec_obs = env.reset(reset_params) done = False # Init hidden state (None if not available) if "recurrence" in configs["model"]: hxs, cxs = model.init_recurrent_cell_states(1, device) if configs["model"]["recurrence"]["layer_type"] == "gru": recurrent_cell = hxs elif configs["model"]["recurrence"]["layer_type"] == "lstm": recurrent_cell = (hxs, cxs) else: recurrent_cell = None # Play episode logger.info("Step 4: Run single episode in realtime . . .") # Store data for video recording log_probs = [] entropies = [] values = [] actions = [] with torch.no_grad(): while not done: # Forward the neural net policy, value, recurrent_cell = model( np.expand_dims(vis_obs, 0) if vis_obs is not None else None, np.expand_dims(vec_obs, 0) if vec_obs is not None else None, recurrent_cell, device) _actions = [] probs = [] entropy = [] # Sample action for action_branch in policy: action = action_branch.sample() _actions.append(action.item()) probs.append(action_branch.probs) entropy.append(action_branch.entropy().item()) # Store data for video recording actions.append(_actions) log_probs.append(probs) entropies.append(entropy) values.append(value) # Step environment vis_obs, vec_obs, _, done, info = env.step(_actions) logger.info("Episode Reward: " + str(info["reward"])) # Complete video data if record_video: trajectory_data = env.get_episode_trajectory trajectory_data["action_names"] = env.action_names trajectory_data["actions"] = actions trajectory_data["log_probs"] = log_probs trajectory_data["entropies"] = entropies trajectory_data["values"] = values trajectory_data["episode_reward"] = info["reward"] trajectory_data["seed"] = seed # Init video recorder video_recorder = VideoRecorder(video_path, frame_rate) # Render and serialize video video_recorder.render_video(trajectory_data) env.close()