コード例 #1
0
def test_training_TD_for_gridworld(model_class, train=True):
    gridworld = GridWorld()
    if train:
        policy, model = rl.train_reinforcement_strategy_temporal_difference(
            epochs=50000, game_obs=gridworld, model_class=model_class
        )
    rl.test_policy(gridworld)
コード例 #2
0
def test_training_TD_for_blackjack(model_class):
    blackjack = BlackJack()
    policy, model = rl.train_reinforcement_strategy_temporal_difference(
        epochs=5000, game_obs=blackjack, model_class=model_class)
    df = pd.DataFrame(policy).T
    df.columns = ['player_value', 'dealer_value', 'decision', 'score']
    policy_Q_table = df.pivot('player_value', 'dealer_value')['decision']
    print policy_Q_table
    policy_Q_score = df.pivot('player_value', 'dealer_value')['score']
    print policy_Q_score

    # Add ipython notebook 3D ghaph

    # Test policy
    rl.test_policy(blackjack)

    return policy, model
コード例 #3
0
def test_training_TD_for_blackjack(model_class):
    blackjack = BlackJack()
    policy, model = rl.train_reinforcement_strategy_temporal_difference(
        epochs=5000, game_obs=blackjack, model_class=model_class
    )
    df = pd.DataFrame(policy).T
    df.columns = ["player_value", "dealer_value", "decision", "score"]
    policy_Q_table = df.pivot("player_value", "dealer_value")["decision"]
    print policy_Q_table
    policy_Q_score = df.pivot("player_value", "dealer_value")["score"]
    print policy_Q_score

    # Add ipython notebook 3D ghaph

    # Test policy
    rl.test_policy(blackjack)

    return policy, model
コード例 #4
0
def test_training_TD_for_gridworld(model_class, train=True):
    gridworld = GridWorld()
    if train:
        policy, model = rl.train_reinforcement_strategy_temporal_difference(
            epochs=50000, game_obs=gridworld, model_class=model_class)
    rl.test_policy(gridworld)