default=0,
                        type=int,
                        help="Render some episodes.")
    parser.add_argument("--threads",
                        default=32,
                        type=int,
                        help="Maximum number of threads to use.")

    parser.add_argument("--evaluate",
                        default=True,
                        type=bool,
                        help="Run evaluation phase.")
    args = parser.parse_args()

    # Create the environment
    env = cart_pole_pixels_evaluator.environment()

    # Construct the network
    network = Network(threads=args.threads)
    network.construct(args, env.state_shape, env.actions)

    # Load the checkpoint if required
    if args.checkpoint:
        # Try extract it from embedded_data
        try:
            import embedded_data_cart_pole_pixels_rudolf_ha_reinforce_baseline
            embedded_data_cart_pole_pixels_rudolf_ha_reinforce_baseline.extract(
            )
            # print("embedded_data extracted")
        except:
            pass
    # ##################################################################################################################

    args = parser.parse_args([] if "__file__" not in globals() else None)

    # Fix random seeds and threads
    np.random.seed(args.seed)
    tf.random.set_seed(args.seed)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Report only errors by default
    if not args.verbose:
        os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

    # Create the environment
    env = cart_pole_pixels_evaluator.environment(seed=args.seed)
    possible_actions = list(range(env.actions))

    # Construct the network
    network = Network(env, args)

    # Training
    for _ in range(args.episodes // args.batch_size):
        batch_states, batch_actions, batch_returns = [], [], []

        # Batch over multiple episodes (failed / finished)
        for _ in range(args.batch_size):
            # Perform episode
            states, actions, rewards = [], [], []
            state, done = env.reset(), False
            while not done: