default=0.05, type=float, help="Learning rate.") parser.add_argument("--alpha_final", default=0.001, type=float, help="Final learning rate.") parser.add_argument("--evaluate", default=False, type=bool, help="Run evaluation phase.") args = parser.parse_args() # Create the environment env = car_racing_evaluator.environment() # Construct the network discrete_steer = [-1, 0, 1] discrete_gas = [0, 1] discrete_brake = [0, 1] discretized_actions = np.array([ x for x in itertools.product(discrete_steer, discrete_gas, discrete_brake) ]) action_size = len(discretized_actions) network = Network() network.construct(args, env.state_shape, action_size) # Training for _ in range(args.episodes // args.batch_size):
type=float, help="Final exploration factor.") parser.add_argument("--gamma", default=None, type=float, help="Discounting factor.") args = parser.parse_args() # Fix random seeds and number of threads np.random.seed(42) tf.random.set_seed(42) tf.config.threading.set_inter_op_parallelism_threads(args.threads) tf.config.threading.set_intra_op_parallelism_threads(args.threads) # Create the environment env = car_racing_evaluator.environment(args.frame_skip) # TODO: Implement a variation to Deep Q Network algorithm. # # Example: How to perform an episode with "always gas" agent. state, done = env.reset(), False while not done: if args.render_each and (env.episode + 1) % args.render_each == 0: env.render() action = [0, 1, 0] next_state, reward, done, _ = env.step(action) # After training (or loading the model), you should run the evaluation: while True: state, done = env.reset(True), False