Esempio n. 1
0
def qlearn_learn(nplayers, num_card_values, num_cards, agent, learn_trials, test_trials, featureExtractor=qlearn.bsFeatureExtractor, explorationProb=0.2, maxIters=1000, dishonesty_list=None, confidence_list=None, learn_list=None, verbose=False):
    print "Running qlearning as agent", agent, "."
    game = play_game.BSGame(nplayers, [num_cards for _ in range(num_card_values)], agent, verbose=0)
    if not dishonesty_list:
        dishonesty_list = [0.1] * nplayers
    if not confidence_list:
        confidence_list = [1] * nplayers
    if not learn_list:
        learn_list = [False] * nplayers
    print "Players have dishonesty", dishonesty_list
    # print "Players have confidence", confidence_list
    apolicies = []
    for t in range(nplayers):
        apolicies.append(policy.DishonestPolicy(game, dishonesty_list[t], confidence=confidence_list[t], learn=learn_list[t]).decision)
    game.setPolicies(apolicies)
    qlearning = qlearn.QLearningAlgorithm(game.actions, game.discount(), featureExtractor, explorationProb=explorationProb)
    print "Learning..."
    simulators.rlsimulate(game, qlearning, numTrials=learn_trials, verbose=False, maxIterations = maxIters)
    qlearning.explorationProb = 0
    game.resetWins()
    print "Learning complete. Now simulating tests..."
    simulators.rlsimulate(game, qlearning, numTrials=test_trials, verbose=verbose, maxIterations = maxIters)
    print "Wins observed:", game.wins
    print "Agent in position", agent, "has a win rate of", str(float(game.wins[agent])/sum(game.wins)), '\n'
    return qlearning
Esempio n. 2
0
def qlearn_test(nplayers, num_card_values, num_cards, agent, trials, qlearning, dishonesty_list=None, confidence_list=None, learn_list=None, verbose=False):
    print "Testing qlearning as agent", agent, "."
    game = play_game.BSGame(nplayers, [num_cards for _ in range(num_card_values)], agent, verbose=0)
    if not dishonesty_list:
        dishonesty_list = [0.1] * nplayers
    if not confidence_list:
        confidence_list = [1] * nplayers
    if not learn_list:
        learn_list = [False] * nplayers
    print "Players have dishonesty", dishonesty_list
    # print "Players have confidence", confidence_list
    apolicies = []
    for t in range(nplayers):
        apolicies.append(policy.DishonestPolicy(game, dishonesty_list[t], confidence=confidence_list[t], learn=learn_list[t]).decision)
    game.setPolicies(apolicies)
    qlearning.explorationProb = 0
    print "Simulating..."
    simulators.rlsimulate(game, qlearning, numTrials=trials, verbose=verbose)
    print "Wins observed:", game.wins
    print "Agent in position", agent, "has a win rate of", str(float(game.wins[agent])/sum(game.wins)), '\n'