default=0, type=int, help="Render some episodes.") parser.add_argument("--threads", default=32, type=int, help="Maximum number of threads to use.") parser.add_argument("--evaluate", default=True, type=bool, help="Run evaluation phase.") args = parser.parse_args() # Create the environment env = cart_pole_pixels_evaluator.environment() # Construct the network network = Network(threads=args.threads) network.construct(args, env.state_shape, env.actions) # Load the checkpoint if required if args.checkpoint: # Try extract it from embedded_data try: import embedded_data_cart_pole_pixels_rudolf_ha_reinforce_baseline embedded_data_cart_pole_pixels_rudolf_ha_reinforce_baseline.extract( ) # print("embedded_data extracted") except: pass
# ################################################################################################################## args = parser.parse_args([] if "__file__" not in globals() else None) # Fix random seeds and threads np.random.seed(args.seed) tf.random.set_seed(args.seed) tf.config.threading.set_inter_op_parallelism_threads(args.threads) tf.config.threading.set_intra_op_parallelism_threads(args.threads) # Report only errors by default if not args.verbose: os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # Create the environment env = cart_pole_pixels_evaluator.environment(seed=args.seed) possible_actions = list(range(env.actions)) # Construct the network network = Network(env, args) # Training for _ in range(args.episodes // args.batch_size): batch_states, batch_actions, batch_returns = [], [], [] # Batch over multiple episodes (failed / finished) for _ in range(args.batch_size): # Perform episode states, actions, rewards = [], [], [] state, done = env.reset(), False while not done: