Esempio n. 1
0
def worker_process(remote: multiprocessing.connection.Connection, env_config,
                   worker_id: int):
    """Initializes the environment and executes its interface.

    Arguments:
        remote {multiprocessing.connection.Connection} -- Parent thread
        env_config {dict} -- The configuration data of the desired environment
        worker_id {int} -- Id for the environment's process. This is necessary for Unity ML-Agents environments, because these operate on different ports.
    """

    # Initialize and wrap the environment
    try:
        env = wrap_environment(env_config, worker_id)
    except KeyboardInterrupt:
        pass

    # Communication interface of the environment thread
    while True:
        try:
            cmd, data = remote.recv()
            if cmd == "step":
                remote.send(env.step(data))
            elif cmd == "reset":
                remote.send(env.reset(data))
            elif cmd == "close":
                remote.send(env.close())
                remote.close()
                break
            else:
                raise NotImplementedError
        except:
            break
Esempio n. 2
0
def main():
    # Docopt command line arguments
    _USAGE = """
    Usage:
        evaluate.py [options]
        evaluate.py --help

    Options:
        --config=<path>            Path of the Config file [default: ./configs/default.yaml].
        --worker-id=<n>            Sets the port for each environment instance [default: 2].
        --path=<path>              Specifies the tag of the tensorboard summaries [default: None].
        --name=<path>              Specifies the full path to save the output file [default: results.res].
    """
    options = docopt(_USAGE)
    config_path = options["--config"]
    worker_id = int(options["--worker-id"])
    path = options["--path"]
    name = options["--name"]

    # Load environment, model, evaluation and training parameters
    configs = YamlParser(config_path).get_config()

    # Determine cuda availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Create dummy environment to retrieve the shapes of the observation and action space for further processing
    print("Step 1: Creating dummy environment of type " +
          configs["environment"]["type"])
    dummy_env = wrap_environment(configs["environment"], worker_id)
    visual_observation_space = dummy_env.visual_observation_space
    vector_observation_space = dummy_env.vector_observation_space
    if isinstance(dummy_env.action_space, spaces.Discrete):
        action_space_shape = (dummy_env.action_space.n, )
    else:
        action_space_shape = tuple(dummy_env.action_space.nvec)
    dummy_env.close()

    # Init evaluator
    print("Step 1: Environment Config")
    for k, v in configs["environment"].items():
        print("Step 1: " + str(k) + ": " + str(v))
    print("Step 2: Evaluation Config")
    for k, v in configs["evaluation"].items():
        print("Step 2: " + str(k) + ": " + str(v))
    print("Step 2: Init Evaluator")
    evaluator = Evaluator(configs["evaluation"], configs["environment"],
                          worker_id, visual_observation_space,
                          vector_observation_space)

    # Load checkpoint paths
    print("Step 3: Load Checkpoint Paths")
    checkpoints = get_sorted_checkpoints(path)
    print("Step 3: Number of Loaded Checkpoint Paths: " +
          str(len(checkpoints)))

    # Evaluate checkpoints
    print("Step 4: Start Evaluation . . .")
    print("Progress:")
    results = []
    current_checkpoint = 0
    for checkpoint in checkpoints:
        _, res = evaluator.evaluate(torch.load(checkpoint), device)
        results.append(res)
        current_checkpoint = current_checkpoint + 1
        prog = current_checkpoint / len(checkpoints)
        print(f"\r{prog:.2f}", end='', flush=True)
    evaluator.close()

    # Save results to file
    print("")
    print("Step 5: Save to File: " + name)
    results = np.asarray(results).reshape(len(checkpoints),
                                          len(configs["evaluation"]["seeds"]),
                                          configs["evaluation"]["n_workers"])
    outfile = open(name, "wb")
    pickle.dump(results, outfile)
    outfile.close()
Esempio n. 3
0
def main():
    # Docopt command line arguments
    _USAGE = """
    Usage:
        evaluate.py [options]
        evaluate.py --help

    Options:
        --config=<path>            Path of the Config file [default: ./configs/default.yaml].
        --untrained                Whether an untrained model should be used [default: False].
        --worker-id=<n>            Sets the port for each environment instance [default: 2].
        --run-id=<path>            Specifies the tag of the tensorboard summaries [default: default].
    """
    options = docopt(_USAGE)
    untrained = options["--untrained"]
    config_path = options["--config"]
    worker_id = int(options["--worker-id"])
    run_id = options["--run-id"]

    # Load environment, model, evaluation and training parameters
    configs = YamlParser(config_path).get_config()

    # Determine cuda availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Create dummy environment to retrieve the shapes of the observation and action space for further processing
    print("Step 1: Creating dummy environment of type " + configs["environment"]["type"])
    dummy_env = wrap_environment(configs["environment"], worker_id)

    visual_observation_space = dummy_env.visual_observation_space
    vector_observation_space = dummy_env.vector_observation_space
    if isinstance(dummy_env.action_space, spaces.Discrete):
        action_space_shape = (dummy_env.action_space.n,)
    else:
        action_space_shape = tuple(dummy_env.action_space.nvec)
    dummy_env.close()

    # Build or load model
    if untrained:
        print("Step 2: Creating model")
        model = OTCModel(configs["model"], visual_observation_space,
                                vector_observation_space, action_space_shape,
                                configs["model"]["use_recurrent"],
                                configs["model"]["hidden_state_size"]).to(device)
    else:
        print("Step 2: Loading model from " + configs["model"]["model_path"])
        model = torch.load(configs["model"]["model_path"]).to(device)
    model.eval()

    # Initialize evaluator
    print("Step 3: Initialize evaluator")
    print("Step 3: Number of Workers: " + str(configs["evaluation"]["n_workers"]))
    print("Step 3: Seeds: " + str(configs["evaluation"]["seeds"]))
    print("Step 3: Number of episodes: " + str(len(configs["evaluation"]["seeds"]) * configs["evaluation"]["n_workers"]))
    evaluator = Evaluator(configs["evaluation"], configs["environment"], worker_id, visual_observation_space, vector_observation_space)

    # Evaluate
    print("Step 4: Run evaluation . . .")
    eval_duration, raw_episode_results = evaluator.evaluate(model, device)
    episode_result = _process_episode_info(raw_episode_results)

    # Print results
    print("RESULT: sec={:3}     mean reward={:.2f} std={:.2f}     mean length={:.1f} std={:.2f}".format(
        eval_duration, episode_result["reward_mean"], episode_result["reward_std"], episode_result["length_mean"], episode_result["length_std"]))

    # Close
    print("Step 5: Closing evaluator . . .")
    evaluator.close()
Esempio n. 4
0
    def __init__(self, configs, worker_id, run_id  = "default", low_mem_fix = False, out_path = "./"):
        """Initializes the trainer, the model, the buffer, the evaluator and launches training environments

        Arguments:
            configs {dict} -- The whole set of configurations (e.g. training and environment configs)
            worker_id {int} -- Specifies the offset for the port to communicate with the environment, which is needed for Unity ML-Agents environments (default: {1})
            run_id {string} -- The run_id is used to tag the training runs (directory names to store summaries and checkpoints) (default: {"default"})
            low_mem_fix {bool} -- Determines whethere to do the training/sampling on cpu or gpu. This is necessary for too small GPU memory capacities (default: {False})
        """
        # Handle Ctrl + C event, which aborts and shuts down the training process in a controlled manner
        signal(SIGINT, self._handler)
        # Create directories for storing checkpoints, logs and tensorboard summaries based on the current time and provided run_id
        if not os.path.exists(out_path + "summaries"):
            os.makedirs(out_path + "summaries")
        if not os.path.exists(out_path + "checkpoints"):
            os.makedirs(out_path + "checkpoints")
        if not os.path.exists(out_path + "logs") or not os.path.exists(out_path + "logs/" + run_id):
            os.makedirs(out_path + "logs/" + run_id)
        timestamp = time.strftime("/%Y%m%d-%H%M%S"+ "_" + str(worker_id) + "/")
        self.checkpoint_path = out_path + "checkpoints/" + run_id + timestamp
        os.makedirs(self.checkpoint_path)

        # Setup logger
        logging.basicConfig(level = logging.INFO, handlers=[])
        self.logger = logging.getLogger("train")
        console = logging.StreamHandler()
        console.setFormatter(logging.Formatter("%(asctime)s: %(message)s", "%Y-%m-%d %H:%M:%S"))
        path = out_path + "logs/" + run_id + timestamp[:-1] + ".log"
        logfile = logging.FileHandler(path, mode="w")
        self.logger.addHandler(console)
        self.logger.addHandler(logfile)

        # Determine cuda availability
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Init members
        self.worker_id = worker_id
        self.run_id = run_id
        self.low_mem_fix = low_mem_fix
        if self.low_mem_fix:
            self.mini_batch_device = torch.device("cpu")
        else:
            self.mini_batch_device = self.device
        self.configs = configs
        self.resume_at = configs["trainer"]['resume_at']
        self.gamma = configs["trainer"]['gamma']
        self.lamda = configs["trainer"]['lamda']
        self.updates = configs["trainer"]['updates']
        self.epochs = configs["trainer"]['epochs']
        self.n_workers = configs["trainer"]['n_workers']
        self.worker_steps = configs["trainer"]['worker_steps']
        self.n_mini_batch = configs["trainer"]['n_mini_batch']
        self.recurrence = None if not "recurrence" in configs["model"] else configs["model"]["recurrence"]
        self.lr_schedule = configs["trainer"]['learning_rate_schedule']
        self.beta_schedule = configs["trainer"]['beta_schedule']
        self.cr_schedule = configs["trainer"]['clip_range_schedule']

        self.batch_size = self.n_workers * self.worker_steps
        self.mini_batch_size = self.batch_size // self.n_mini_batch
        assert (self.batch_size % self.n_mini_batch == 0), "Batch Size divided by number of mini batches has a remainder."
        self.writer = SummaryWriter(out_path + "summaries/" + run_id + timestamp)
        self._write_hyperparameters(configs)

        self.checkpoint_interval = configs["model"]["checkpoint_interval"]

        # Start logging the training setup
        self.logger.info("Step 1: Provided config:")
        for key in configs:
            self.logger.info("Step 1: " + str(key) + ":")
            for k, v in configs[key].items():
                self.logger.info("Step 1: " + str(k) + ": " + str(v))

        self.logger.info("Step 2: Creating dummy environment")
        # Create dummy environment to retrieve the shapes of the observation and action space for further processing
        self.dummy_env = wrap_environment(configs["environment"], worker_id)
        visual_observation_space = self.dummy_env.visual_observation_space
        vector_observation_space = self.dummy_env.vector_observation_space
        if isinstance(self.dummy_env.action_space, spaces.Discrete):
            self.action_space_shape = (self.dummy_env.action_space.n,)
        else:
            self.action_space_shape = tuple(self.dummy_env.action_space.nvec)
        self.dummy_env.close()

        self.logger.info("Step 2: Visual Observation Space: " + str(visual_observation_space))
        self.logger.info("Step 2: Vector Observation Space: " + str(vector_observation_space))
        self.logger.info("Step 2: Action Space Shape: " + str(self.action_space_shape))
        self.logger.info("Step 2: Action Names: " + str(self.dummy_env.action_names))

        # Prepare evaluator if configured
        self.eval = configs["evaluation"]["evaluate"]
        self.eval_interval = configs["evaluation"]["interval"]
        if self.eval:
            self.logger.info("Step 2b: Initializing evaluator")
            self.evaluator = Evaluator(configs, worker_id, visual_observation_space, vector_observation_space)

        # Instantiate experience/training data buffer
        self.buffer = Buffer(
            self.n_workers, self.worker_steps, self.n_mini_batch,
            visual_observation_space, vector_observation_space,
            self.action_space_shape, self.recurrence,
            self.device, self.mini_batch_device)

        # Init model
        self.logger.info("Step 3: Creating model")
        self.model = OTCModel(configs["model"], visual_observation_space, vector_observation_space,
                                self.action_space_shape, self.recurrence).to(self.device)

        # Instantiate optimizer
        self.optimizer = optim.AdamW(self.model.parameters(), lr=self.lr_schedule["initial"])

        # Load checkpoint and apply data
        if configs["model"]["load_model"]:
            self.logger.info("Step 3: Loading model from " + configs["model"]["model_path"])
            checkpoint = load_checkpoint(configs["model"]["model_path"])
            self.model.load_state_dict(checkpoint["model_state_dict"])
            if self.recurrence is not None:
                self.model.set_mean_recurrent_cell_states(checkpoint["hxs"], checkpoint["cxs"])
            self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
            # self.resume_at = checkpoint["update"] + 1

        # Set model to train mode
        self.model.train()

        # Launch workers
        self.logger.info("Step 4: Launching training environments of type " + configs["environment"]["type"])
        self.workers = [Worker(configs["environment"], worker_id + 200 + w) for w in range(self.n_workers)]

        # Setup initial observations
        if visual_observation_space is not None:
            self.vis_obs = np.zeros((self.n_workers,) + visual_observation_space.shape, dtype=np.float32)
        else:
            self.vis_obs = None
        if vector_observation_space is not None:
            self.vec_obs = np.zeros((self.n_workers,) + vector_observation_space, dtype=np.float32)
        else:
            self.vec_obs = None

        # Setup initial recurrent cell
        if self.recurrence is not None:
            hxs, cxs = self.model.init_recurrent_cell_states(self.n_workers, self.mini_batch_device)
            if self.recurrence["layer_type"] == "gru":
                self.recurrent_cell = hxs
            elif self.recurrence["layer_type"] == "lstm":
                self.recurrent_cell = (hxs, cxs)
        else:
            self.recurrent_cell = None

        # Reset workers
        for worker in self.workers:
            worker.child.send(("reset", None))
        # Grab initial observations
        for i, worker in enumerate(self.workers):
            vis_obs, vec_obs = worker.child.recv()
            if self.vis_obs is not None:
                self.vis_obs[i] = vis_obs
            if self.vec_obs is not None:
                self.vec_obs[i] = vec_obs
Esempio n. 5
0
def main():
    # Docopt command line arguments
    _USAGE = """
    Usage:
        evaluate.py [options]
        evaluate.py --help

    Options:
        --config=<path>            Path of the Config file [default: ./configs/default.yaml].
        --untrained                Whether an untrained model should be used [default: False].
        --worker-id=<n>            Sets the port for each environment instance [default: 2].
        --video=<path>             Specify a path for saving videos, if video recording is desired. The files' extension will be set automatically. [default: ./video].
    """
    options = docopt(_USAGE)
    untrained = options["--untrained"]
    config_path = options["--config"]
    worker_id = int(options["--worker-id"])
    video_path = options["--video"]

    # Determine whether to record a video. A video is only recorded if the video flag is used.
    record_video = False
    for i, arg in enumerate(sys.argv):
        if "--video" in arg:
            record_video = True
            logger.info(
                "Step 0: Video recording enabled. Video will be saved to " +
                video_path)
            break

    # Load environment, model, evaluation and training parameters
    configs = YamlParser(config_path).get_config()

    # Determine cuda availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Create dummy environment to retrieve the shapes of the observation and action space for further processing
    logger.info("Step 1: Creating dummy environment of type " +
                configs["environment"]["type"])
    dummy_env = wrap_environment(configs["environment"], worker_id)
    visual_observation_space = dummy_env.visual_observation_space
    vector_observation_space = dummy_env.vector_observation_space
    if isinstance(dummy_env.action_space, spaces.Discrete):
        action_space_shape = (dummy_env.action_space.n, )
    else:
        action_space_shape = tuple(dummy_env.action_space.nvec)
    dummy_env.close()

    # Build or load model
    logger.info("Step 2: Creating model")
    model = OTCModel(
        configs["model"], visual_observation_space, vector_observation_space,
        action_space_shape, configs["model"]["recurrence"]
        if "recurrence" in configs["model"] else None).to(device)
    if not untrained:
        logger.info("Step 2: Loading model from " +
                    configs["model"]["model_path"])
        checkpoint = load_checkpoint(configs["model"]["model_path"])
        model.load_state_dict(checkpoint["model_state_dict"])
        if "recurrence" in configs["model"]:
            model.set_mean_recurrent_cell_states(checkpoint["hxs"],
                                                 checkpoint["cxs"])
    model.eval()

    # Initialize evaluator
    logger.info("Step 3: Initialize evaluator")
    logger.info("Step 3: Number of Workers: " +
                str(configs["evaluation"]["n_workers"]))
    logger.info("Step 3: Seeds: " + str(configs["evaluation"]["seeds"]))
    logger.info("Step 3: Number of episodes: " + str(
        len(configs["evaluation"]["seeds"]) *
        configs["evaluation"]["n_workers"]))
    evaluator = Evaluator(configs, worker_id, visual_observation_space,
                          vector_observation_space, video_path, record_video)

    # Evaluate
    logger.info("Step 4: Run evaluation . . .")
    eval_duration, raw_episode_results = evaluator.evaluate(model, device)
    episode_result = _process_episode_info(raw_episode_results)

    # Print results
    logger.info(
        "RESULT: sec={:3}     mean reward={:.2f} std={:.2f}     mean length={:.1f} std={:.2f}"
        .format(eval_duration, episode_result["reward_mean"],
                episode_result["reward_std"], episode_result["length_mean"],
                episode_result["length_std"]))

    # Close
    logger.info("Step 5: Closing evaluator . . .")
    evaluator.close()
Esempio n. 6
0
    def __init__(self,
                 configs,
                 worker_id,
                 run_id="default",
                 low_mem_fix=False):
        """Initializes the trainer, the model, the buffer, the evaluator and launches training environments

        Arguments:
            configs {dict} -- The whole set of configurations (e.g. training and environment configs)
            worker_id {int} -- Specifies the offset for the port to communicate with the environment, which is needed for Unity ML-Agents environments (default: {1})
            run_id {string} -- The run_id is used to tag the training runs (directory names to store summaries and checkpoints) (default: {"default"})
            low_mem_fix {bool} -- Determines whethere to do the training/sampling on cpu or gpu. This is necessary for too small GPU memory capacities (default: {False})
        """
        # Handle Ctrl + C event, which aborts and shuts down the training process in a controlled manner
        signal(SIGINT, self.handler)
        # Create directories for storing checkpoints, models and tensorboard summaries based on the current time and provided run_id
        if not os.path.exists("summaries"):
            os.makedirs("summaries")
        if not os.path.exists("checkpoints"):
            os.makedirs("checkpoints")
        timestamp = time.strftime("/%Y%m%d-%H%M%S" + "_" + str(worker_id) +
                                  "/")
        self.checkpoint_path = "checkpoints/" + run_id + timestamp
        os.makedirs(self.checkpoint_path)

        # Determine cuda availability
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        # Init members
        self.worker_id = worker_id
        self.run_id = run_id
        self.low_mem_fix = low_mem_fix
        if self.low_mem_fix:
            self.mini_batch_device = torch.device("cpu")
        else:
            self.mini_batch_device = self.device
        self.resume_at = configs["trainer"]['resume_at']
        self.gamma = configs["trainer"]['gamma']
        self.lamda = configs["trainer"]['lamda']
        self.updates = configs["trainer"]['updates']
        self.epochs = configs["trainer"]['epochs']
        self.n_workers = configs["trainer"]['n_workers']
        self.worker_steps = configs["trainer"]['worker_steps']
        self.n_mini_batch = configs["trainer"]['n_mini_batch']
        self.use_recurrent = configs["model"]["use_recurrent"]
        self.hidden_state_size = configs["model"]["hidden_state_size"]
        self.lr_schedule = configs["trainer"]['learning_rate_schedule']
        self.beta_schedule = configs["trainer"]['beta_schedule']
        self.cr_schedule = configs["trainer"]['clip_range_schedule']

        self.batch_size = self.n_workers * self.worker_steps
        self.mini_batch_size = self.batch_size // self.n_mini_batch
        assert (
            self.batch_size % self.n_mini_batch == 0
        ), "Batch Size divided by number of mini batches has a remainder."
        self.writer = SummaryWriter("summaries/" + run_id + timestamp)
        self.write_hyperparameters(configs["trainer"])

        self.checkpoint_interval = configs["model"]["checkpoint_interval"]

        print("Step 1: Provided config:")
        for key in configs:
            print("Step 1: " + str(key) + ":")
            for k, v in configs[key].items():
                print("Step 1: " + str(k) + ": " + str(v))

        print("Step 2: Creating dummy environment")
        # Create dummy environment to retrieve the shapes of the observation and action space for further processing
        self.dummy_env = wrap_environment(configs["environment"], worker_id)

        visual_observation_space = self.dummy_env.visual_observation_space
        vector_observation_space = self.dummy_env.vector_observation_space
        if isinstance(self.dummy_env.action_space, spaces.Discrete):
            self.action_space_shape = (self.dummy_env.action_space.n, )
        else:
            self.action_space_shape = tuple(self.dummy_env.action_space.nvec)
        self.dummy_env.close()

        print("Step 2: Visual Observation Space: " +
              str(visual_observation_space))
        print("Step 2: Vector Observation Space: " +
              str(vector_observation_space))
        print("Step 2: Action Space Shape: " + str(self.action_space_shape))
        print("Step 2: Action Names: " + str(self.dummy_env.action_names))

        # Prepare evaluator if configured
        self.eval = configs["evaluation"]["evaluate"]
        self.eval_interval = configs["evaluation"]["interval"]
        if self.eval:
            print("Step 2b: Initializing evaluator")
            self.evaluator = Evaluator(configs["evaluation"],
                                       configs["environment"], worker_id,
                                       visual_observation_space,
                                       vector_observation_space)

        # Build or load model
        if not configs["model"]["load_model"]:
            print("Step 3: Creating model")
            self.model = OTCModel(configs["model"], visual_observation_space,
                                  vector_observation_space,
                                  self.action_space_shape, self.use_recurrent,
                                  self.hidden_state_size).to(self.device)
        else:
            print("Step 3: Loading model from " +
                  configs["model"]["model_path"])
            self.model = torch.load(configs["model"]["model_path"]).to(
                self.device)
        self.model.train()

        # Instantiate optimizer
        self.optimizer = optim.AdamW(self.model.parameters(),
                                     lr=self.lr_schedule["initial"])
        # Instantiate experience/training data buffer
        self.buffer = Buffer(self.n_workers, self.worker_steps,
                             self.n_mini_batch, visual_observation_space,
                             vector_observation_space, self.action_space_shape,
                             self.use_recurrent, self.hidden_state_size,
                             self.device, self.mini_batch_device)

        # Launch workers
        print("Step 4: Launching training environments of type " +
              configs["environment"]["type"])
        self.workers = []
        for i in range(self.n_workers):
            id = worker_id + 200 + i
            self.workers.append(Worker(configs["environment"], id))

        # Setup initial observations
        if visual_observation_space is not None:
            self.vis_obs = np.zeros(
                (self.n_workers, ) + visual_observation_space.shape,
                dtype=np.float32)
        else:
            self.vis_obs = None
        if vector_observation_space is not None:
            self.vec_obs = np.zeros(
                (self.n_workers, ) + vector_observation_space,
                dtype=np.float32)
        else:
            self.vec_obs = None

        # Setup initial hidden states
        if self.use_recurrent:
            self.hidden_state = torch.zeros(
                (self.n_workers, self.hidden_state_size),
                dtype=torch.float32,
                device=self.device)
        else:
            self.hidden_state = None

        # Reset workers
        for worker in self.workers:
            worker.child.send(("reset", None))
        # Grab initial observations
        for i, worker in enumerate(self.workers):
            vis_obs, vec_obs = worker.child.recv()
            if self.vis_obs is not None:
                self.vis_obs[i] = vis_obs
            if self.vec_obs is not None:
                self.vec_obs[i] = vec_obs
Esempio n. 7
0
def main():
    # Docopt command line arguments
    _USAGE = """
    Usage:
        enjoy.py [options]
        enjoy.py --help

    Options:
        --config=<path>            Path of the Config file [default: ./configs/default.yaml].
        --untrained                Whether an untrained model should be used [default: False].
        --worker-id=<n>            Sets the port for each environment instance [default: 2].
        --seed=<n>                 The to be played seed of an episode [default: 0].
        --video=<path>             Specify a path for saving a video, if video recording is desired. The file's extension will be set automatically. [default: ./video].
        --framerate=<n>            Specifies the frame rate of a video shall be rendered. [default: 6]
    """
    options = docopt(_USAGE)
    untrained = options["--untrained"]
    config_path = options["--config"]
    worker_id = int(options["--worker-id"])
    seed = int(options["--seed"])
    video_path = options["--video"]
    frame_rate = options["--framerate"]

    # Determine whether to record a video. A video is only recorded if the video flag is used.
    record_video = False
    for i, arg in enumerate(sys.argv):
        if "--video" in arg:
            record_video = True
            logger.info(
                "Step 0: Video recording enabled. Video will be saved to " +
                video_path)
            break

    # Load environment, model, evaluation and training parameters
    configs = YamlParser(config_path).get_config()

    # Determine cuda availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Launch environment
    logger.info("Step 1: Launching environment")
    env = wrap_environment(configs["environment"],
                           worker_id,
                           realtime_mode=True,
                           record_trajectory=record_video)
    # Retrieve observation space
    visual_observation_space = env.visual_observation_space
    vector_observation_space = env.vector_observation_space
    if isinstance(env.action_space, spaces.Discrete):
        action_space_shape = (env.action_space.n, )
    else:
        action_space_shape = tuple(env.action_space.nvec)

    # Build or load model
    logger.info("Step 2: Creating model")
    model = OTCModel(
        configs["model"], visual_observation_space, vector_observation_space,
        action_space_shape, configs["model"]["recurrence"]
        if "recurrence" in configs["model"] else None).to(device)
    if not untrained:
        logger.info("Step 2: Loading model from " +
                    configs["model"]["model_path"])
        checkpoint = load_checkpoint(configs["model"]["model_path"])
        model.load_state_dict(checkpoint["model_state_dict"])
        if "recurrence" in configs["model"]:
            model.set_mean_recurrent_cell_states(checkpoint["hxs"],
                                                 checkpoint["cxs"])
    model.eval()

    # Reset environment
    logger.info("Step 3: Resetting the environment")
    logger.info("Step 3: Using seed " + str(seed))
    reset_params = configs["environment"]["reset_params"]
    reset_params["seed"] = seed
    vis_obs, vec_obs = env.reset(reset_params)
    done = False

    # Init hidden state (None if not available)
    if "recurrence" in configs["model"]:
        hxs, cxs = model.init_recurrent_cell_states(1, device)
        if configs["model"]["recurrence"]["layer_type"] == "gru":
            recurrent_cell = hxs
        elif configs["model"]["recurrence"]["layer_type"] == "lstm":
            recurrent_cell = (hxs, cxs)
    else:
        recurrent_cell = None

    # Play episode
    logger.info("Step 4: Run single episode in realtime . . .")

    # Store data for video recording
    log_probs = []
    entropies = []
    values = []
    actions = []

    with torch.no_grad():
        while not done:
            # Forward the neural net
            policy, value, recurrent_cell = model(
                np.expand_dims(vis_obs, 0) if vis_obs is not None else None,
                np.expand_dims(vec_obs, 0) if vec_obs is not None else None,
                recurrent_cell, device)

            _actions = []
            probs = []
            entropy = []
            # Sample action
            for action_branch in policy:
                action = action_branch.sample()
                _actions.append(action.item())
                probs.append(action_branch.probs)
                entropy.append(action_branch.entropy().item())

            # Store data for video recording
            actions.append(_actions)
            log_probs.append(probs)
            entropies.append(entropy)
            values.append(value)

            # Step environment
            vis_obs, vec_obs, _, done, info = env.step(_actions)

    logger.info("Episode Reward: " + str(info["reward"]))

    # Complete video data
    if record_video:
        trajectory_data = env.get_episode_trajectory
        trajectory_data["action_names"] = env.action_names
        trajectory_data["actions"] = actions
        trajectory_data["log_probs"] = log_probs
        trajectory_data["entropies"] = entropies
        trajectory_data["values"] = values
        trajectory_data["episode_reward"] = info["reward"]
        trajectory_data["seed"] = seed
        # Init video recorder
        video_recorder = VideoRecorder(video_path, frame_rate)
        # Render and serialize video
        video_recorder.render_video(trajectory_data)

    env.close()