def test_ltft_ipd():
    from marltoolbox.experiments.rllib_api.ltft_various_env import main
    ray.shutdown()
    tune_analysis_self_play, tune_analysis_against_opponent = main(
        debug=False,
        env="IteratedPrisonersDilemma",
        train_n_replicates=1,
        against_naive_opp=True)
    print_metrics_available(tune_analysis_self_play)
    check_learning_achieved(
        tune_results=tune_analysis_self_play,
        min_=-42)
    check_learning_achieved(
        tune_results=tune_analysis_self_play,
        min_=0.9,
        metric="custom_metrics.CC_freq/player_row_mean")
    check_learning_achieved(
        tune_results=tune_analysis_self_play,
        min_=0.9,
        metric="custom_metrics.CC_freq/player_col_mean")
    print_metrics_available(tune_analysis_against_opponent)
    check_learning_achieved(
        tune_results=tune_analysis_against_opponent,
        max_=-75)
    check_learning_achieved(
        tune_results=tune_analysis_against_opponent,
        min_=0.9,
        metric="custom_metrics.DD_freq/player_row_mean")
    check_learning_achieved(
        tune_results=tune_analysis_against_opponent,
        min_=0.9,
        metric="custom_metrics.DD_freq/player_col_mean")
def test_ppo_asym_coin_game():
    from marltoolbox.examples.rllib_api.ppo_coin_game import main
    ray.shutdown()
    tune_analysis = main(debug=False, stop_iters=200)
    print_metrics_available(tune_analysis)
    check_learning_achieved(
        tune_results=tune_analysis, min_=15)
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.30,
        metric="custom_metrics.pick_speed/player_red_mean")
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.30,
        metric="custom_metrics.pick_speed/player_blue_mean")
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.40,
        max_=0.60,
        metric="custom_metrics.pick_own_color/player_red_mean")
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.40,
        max_=0.60,
        metric="custom_metrics.pick_own_color/player_blue_mean")
def test_amtft_coin_game():
    from marltoolbox.experiments.rllib_api.amtft_various_env import main
    ray.shutdown()
    tune_analysis_per_welfare, analysis_metrics_per_mode = main(
        debug=False, train_n_replicates=1, filter_utilitarian=False,
        env="CoinGame")
    for welfare_name, tune_analysis in tune_analysis_per_welfare.items():
        print("welfare_name", welfare_name)
        print_metrics_available(tune_analysis)
        check_learning_achieved(
            tune_results=tune_analysis,
            min_=40)
        check_learning_achieved(
            tune_results=tune_analysis,
            min_=0.25,
            metric="custom_metrics.pick_speed/player_red_mean")
def test_pg_ipd():
    from marltoolbox.examples.rllib_api.pg_ipd import main
    # Restart Ray defensively in case the ray connection is lost.
    ray.shutdown()
    tune_analysis = main(debug=False)
    print_metrics_available(tune_analysis)
    check_learning_achieved(
        tune_results=tune_analysis,
        max_=-75)
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.9,
        metric="custom_metrics.DD_freq/player_row_mean")
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.9,
        metric="custom_metrics.DD_freq/player_col_mean")
def test_ltft_coin_game():
    from marltoolbox.experiments.rllib_api.ltft_various_env import main
    ray.shutdown()
    tune_analysis_self_play, tune_analysis_against_opponent = main(
        debug=False, env="CoinGame", train_n_replicates=1,
        against_naive_opp=True)
    print_metrics_available(tune_analysis_self_play)
    check_learning_achieved(
        tune_results=tune_analysis_self_play,
        min_=50)
    check_learning_achieved(
        tune_results=tune_analysis_self_play,
        min_=0.3,
        metric="custom_metrics.pick_speed/player_red_mean")
    check_learning_achieved(
        tune_results=tune_analysis_self_play,
        min_=0.3,
        metric="custom_metrics.pick_speed/player_blue_mean")
    check_learning_achieved(
        tune_results=tune_analysis_self_play,
        min_=0.9,
        metric="custom_metrics.pick_own_color/player_red_mean")
    check_learning_achieved(
        tune_results=tune_analysis_self_play,
        min_=0.9,
        metric="custom_metrics.pick_own_color/player_blue_mean")
    print_metrics_available(tune_analysis_against_opponent)
    check_learning_achieved(
        tune_results=tune_analysis_against_opponent,
        max_=20)
    check_learning_achieved(
        tune_results=tune_analysis_against_opponent,
        min_=0.3,
        metric="custom_metrics.pick_speed/player_red_mean")
    check_learning_achieved(
        tune_results=tune_analysis_against_opponent,
        min_=0.3,
        metric="custom_metrics.pick_speed/player_blue_mean")
    check_learning_achieved(
        tune_results=tune_analysis_against_opponent,
        max_=0.6,
        metric="custom_metrics.pick_own_color/player_red_mean")
    check_learning_achieved(
        tune_results=tune_analysis_against_opponent,
        max_=0.6,
        metric="custom_metrics.pick_own_color/player_blue_mean")
def test_amtft_ipd():
    from marltoolbox.experiments.rllib_api.amtft_various_env import main
    ray.shutdown()
    tune_analysis_per_welfare, analysis_metrics_per_mode = main(
        debug=False, train_n_replicates=1, filter_utilitarian=False,
        env="IteratedPrisonersDilemma")
    for welfare_name, tune_analysis in tune_analysis_per_welfare.items():
        print("welfare_name", welfare_name)
        print_metrics_available(tune_analysis)
        check_learning_achieved(
            tune_results=tune_analysis, min_=-204)
        check_learning_achieved(
            tune_results=tune_analysis,
            min_=0.9,
            metric="custom_metrics.CC_freq/player_row_mean"
        )
        check_learning_achieved(
            tune_results=tune_analysis,
            min_=0.9,
            metric="custom_metrics.CC_freq/player_col_mean"
        )
def test_dqn_wt_utilitarian_welfare_coin_game():
    from marltoolbox.examples.rllib_api.dqn_wt_welfare import main
    ray.shutdown()
    tune_analysis = main(debug=False)
    print_metrics_available(tune_analysis)
    check_learning_achieved(
        tune_results=tune_analysis, min_=50)
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.3,
        metric="custom_metrics.pick_speed/player_red_mean")
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.3,
        metric="custom_metrics.pick_speed/player_blue_mean")
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.95,
        metric="custom_metrics.pick_own_color/player_red_mean")
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.95,
        metric="custom_metrics.pick_own_color/player_blue_mean")
def test_dqn_wt_inequity_aversion_welfare_coin_game():
    from marltoolbox.examples.rllib_api.dqn_wt_welfare import main
    ray.shutdown()
    tune_analysis = main(debug=False,
                         welfare=postprocessing.WELFARE_INEQUITY_AVERSION)
    print_metrics_available(tune_analysis)
    check_learning_achieved(
        tune_results=tune_analysis, min_=50)
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.25,
        metric="custom_metrics.pick_speed/player_red_mean")
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.25,
        metric="custom_metrics.pick_speed/player_blue_mean")
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.9,
        metric="custom_metrics.pick_own_color/player_red_mean")
    check_learning_achieved(
        tune_results=tune_analysis,
        min_=0.9,
        metric="custom_metrics.pick_own_color/player_blue_mean")
Ejemplo n.º 9
0
def test_adaptive_mechanism_design_tune_class_api_wt_rllib_policy():
    from marltoolbox.examples.tune_class_api.amd import main
    ray.shutdown(
    )  # Restart Ray defensively in case the ray connection is lost.
    main(debug=True, use_rllib_policy=True)
Ejemplo n.º 10
0
def test_l1br_lola_pg_tune_class_api():
    from marltoolbox.examples.tune_class_api.l1br_lola_pg import main
    ray.shutdown(
    )  # Restart Ray defensively in case the ray connection is lost.
    main(debug=True)
Ejemplo n.º 11
0
def test_lola_pg_tune_fn_api():
    from marltoolbox.examples.tune_function_api.lola_pg_official import main
    ray.shutdown(
    )  # Restart Ray defensively in case the ray connection is lost.
    main(debug=True)
Ejemplo n.º 12
0
def test_l1br_amtft():
    from marltoolbox.examples.rllib_api.l1br_amtft import main
    ray.shutdown(
    )  # Restart Ray defensively in case the ray connection is lost.
    main(debug=True)
Ejemplo n.º 13
0
def test_inequity_aversion():
    from marltoolbox.examples.rllib_api.inequity_aversion import main
    ray.shutdown(
    )  # Restart Ray defensively in case the ray connection is lost.
    main(debug=True)
Ejemplo n.º 14
0
def test_inequity_aversion():
    from marltoolbox.examples.rllib_api.inequity_aversion import main

    ray.shutdown()
    main(debug=True)
Ejemplo n.º 15
0
def test_amtft_asym_coin_game():
    from marltoolbox.experiments.rllib_api.amtft_various_env import main

    ray.shutdown()
    main(debug=True, env="AsymCoinGame")
Ejemplo n.º 16
0
def test_ltft_ipd():
    from marltoolbox.examples.rllib_api.ltft_ipd import main
    ray.shutdown()  # Restart Ray defensively in case the ray connection is lost.
    tune_analysis_self_play, tune_analysis_naive_opponent = main(debug=False)
    check_learning_achieved(tune_results=tune_analysis_self_play, reward=-42, min=True)
    check_learning_achieved(tune_results=tune_analysis_naive_opponent, reward=-78, max=True)
Ejemplo n.º 17
0
def test_ppo_asym_coin_game():
    from marltoolbox.examples.rllib_api.ppo_asymmetric_coin_game import main
    ray.shutdown()  # Restart Ray defensively in case the ray connection is lost.
    tune_analysis = main(debug=False, stop_iters=70)
    check_learning_achieved(tune_results=tune_analysis, reward=20, min=True)
Ejemplo n.º 18
0
def test_pg_ipd():
    from marltoolbox.examples.rllib_api.pg_ipd import main
    ray.shutdown()  # Restart Ray defensively in case the ray connection is lost.
    tune_analysis = main(debug=False)
    check_learning_achieved(tune_results=tune_analysis, reward=-75, max=True)
Ejemplo n.º 19
0
def test_l1br_amtft_coin_game():
    from marltoolbox.experiments.rllib_api.l1br_amtft import main

    ray.shutdown()
    main(debug=True, env="CoinGame")
Ejemplo n.º 20
0
def test_l1br_amtft_iasymbos():
    from marltoolbox.experiments.rllib_api.l1br_amtft import main

    ray.shutdown()
    main(debug=True, env="IteratedAsymBoS")
Ejemplo n.º 21
0
def test_l1br_amtft_ipd():
    from marltoolbox.experiments.rllib_api.l1br_amtft import main

    ray.shutdown()
    main(debug=True, env="IteratedPrisonersDilemma")
Ejemplo n.º 22
0
def test_ltft_coin_game():
    from marltoolbox.experiments.rllib_api.ltft_various_env import main

    ray.shutdown()
    main(debug=True, env="CoinGame", train_n_replicates=1)
Ejemplo n.º 23
0
def test_ltft_ipd():
    from marltoolbox.experiments.rllib_api.ltft_various_env import main

    ray.shutdown()
    main(debug=True, env="IteratedPrisonersDilemma", train_n_replicates=1)
Ejemplo n.º 24
0
def test_pg_ipd():
    from marltoolbox.examples.rllib_api.pg_ipd import main
    ray.shutdown(
    )  # Restart Ray defensively in case the ray connection is lost.
    main(stop_iters=10, tf=False, debug=True)
Ejemplo n.º 25
0
def test_ppo_asym_coin_game():
    from marltoolbox.examples.rllib_api.dqn_coin_game import main

    ray.shutdown()
    main(debug=True)
Ejemplo n.º 26
0
def test_amtft_mixed_motive_coin_game():
    from marltoolbox.experiments.rllib_api.amtft_various_env import main

    ray.shutdown()
    main(debug=True, env="MixedMotiveCoinGame")
Ejemplo n.º 27
0
def test_amtft_ipd():
    from marltoolbox.examples.rllib_api.amtft_various_env import main
    ray.shutdown()  # Restart Ray defensively in case the ray connection is lost.
    tune_analysis_per_welfare, analysis_metrics_per_mode = main(debug=False, train_n_replicates=1, filter_utilitarian=False)
    for welfare_name, tune_analysis in tune_analysis_per_welfare.items():
        check_learning_achieved(tune_results=tune_analysis, reward=-204, min=True)
Ejemplo n.º 28
0
def test_adaptive_mechanism_design_tune_class_api_wt_rllib_policy():
    from marltoolbox.experiments.tune_class_api.amd import main

    ray.shutdown()
    main(debug=True, use_rllib_policy=True)
Ejemplo n.º 29
0
def test_ppo_asym_coin_game():
    from marltoolbox.examples.rllib_api.ppo_asymmetric_coin_game import main
    ray.shutdown(
    )  # Restart Ray defensively in case the ray connection is lost.
    main(stop_iters=3, tf=False, debug=True)
Ejemplo n.º 30
0
def test_l1br_lola_pg_tune_class_api():
    from marltoolbox.experiments.tune_class_api.l1br_lola_pg import main

    ray.shutdown()
    main(debug=True)