Beispiel #1
0
    FREEZE_INTERVAL = 1000
    DETERMINISTIC = True


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon,
                          parameters.momentum, parameters.clip_norm,
                          parameters.freeze_interval, parameters.batch_size,
                          parameters.update_rule, rng)

    train_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.1)
    test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        qnetwork,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
Beispiel #2
0
    FREEZE_INTERVAL = 1000
    DETERMINISTIC = True


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    
    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()
    
    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(
        env,
        parameters.rms_decay,
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_delta,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.update_rule,
        rng)
    
    train_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.1)
    test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.)
Beispiel #3
0
    FREEZE_INTERVAL = 1000
    DETERMINISTIC = True


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon,
                          parameters.momentum, parameters.clip_delta,
                          parameters.freeze_interval, parameters.batch_size,
                          parameters.network_type, parameters.update_rule,
                          parameters.batch_accumulator, rng)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env, qnetwork, parameters.replay_memory_size,
        max(env.inputDimensions()[i][0]
            for i in range(len(env.inputDimensions()))), parameters.batch_size,
        rng)
Beispiel #4
0
    FREEZE_INTERVAL = 1000
    DETERMINISTIC = True


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    
    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()
    
    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(
        env,
        parameters.rms_decay,
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_delta,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.network_type,
        parameters.update_rule,
        parameters.batch_accumulator,
        rng)