Beispiel #1
0
                        default=0.05,
                        type=float,
                        help="Learning rate.")
    parser.add_argument("--alpha_final",
                        default=0.001,
                        type=float,
                        help="Final learning rate.")

    parser.add_argument("--evaluate",
                        default=False,
                        type=bool,
                        help="Run evaluation phase.")
    args = parser.parse_args()

    # Create the environment
    env = car_racing_evaluator.environment()

    # Construct the network
    discrete_steer = [-1, 0, 1]
    discrete_gas = [0, 1]
    discrete_brake = [0, 1]
    discretized_actions = np.array([
        x for x in itertools.product(discrete_steer, discrete_gas,
                                     discrete_brake)
    ])
    action_size = len(discretized_actions)
    network = Network()
    network.construct(args, env.state_shape, action_size)

    # Training
    for _ in range(args.episodes // args.batch_size):
Beispiel #2
0
                        type=float,
                        help="Final exploration factor.")
    parser.add_argument("--gamma",
                        default=None,
                        type=float,
                        help="Discounting factor.")
    args = parser.parse_args()

    # Fix random seeds and number of threads
    np.random.seed(42)
    tf.random.set_seed(42)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Create the environment
    env = car_racing_evaluator.environment(args.frame_skip)

    # TODO: Implement a variation to Deep Q Network algorithm.
    #
    # Example: How to perform an episode with "always gas" agent.
    state, done = env.reset(), False
    while not done:
        if args.render_each and (env.episode + 1) % args.render_each == 0:
            env.render()

        action = [0, 1, 0]
        next_state, reward, done, _ = env.step(action)

    # After training (or loading the model), you should run the evaluation:
    while True:
        state, done = env.reset(True), False