def qlearn_learn(nplayers, num_card_values, num_cards, agent, learn_trials, test_trials, featureExtractor=qlearn.bsFeatureExtractor, explorationProb=0.2, maxIters=1000, dishonesty_list=None, confidence_list=None, learn_list=None, verbose=False): print "Running qlearning as agent", agent, "." game = play_game.BSGame(nplayers, [num_cards for _ in range(num_card_values)], agent, verbose=0) if not dishonesty_list: dishonesty_list = [0.1] * nplayers if not confidence_list: confidence_list = [1] * nplayers if not learn_list: learn_list = [False] * nplayers print "Players have dishonesty", dishonesty_list # print "Players have confidence", confidence_list apolicies = [] for t in range(nplayers): apolicies.append(policy.DishonestPolicy(game, dishonesty_list[t], confidence=confidence_list[t], learn=learn_list[t]).decision) game.setPolicies(apolicies) qlearning = qlearn.QLearningAlgorithm(game.actions, game.discount(), featureExtractor, explorationProb=explorationProb) print "Learning..." simulators.rlsimulate(game, qlearning, numTrials=learn_trials, verbose=False, maxIterations = maxIters) qlearning.explorationProb = 0 game.resetWins() print "Learning complete. Now simulating tests..." simulators.rlsimulate(game, qlearning, numTrials=test_trials, verbose=verbose, maxIterations = maxIters) print "Wins observed:", game.wins print "Agent in position", agent, "has a win rate of", str(float(game.wins[agent])/sum(game.wins)), '\n' return qlearning
def qlearn_test(nplayers, num_card_values, num_cards, agent, trials, qlearning, dishonesty_list=None, confidence_list=None, learn_list=None, verbose=False): print "Testing qlearning as agent", agent, "." game = play_game.BSGame(nplayers, [num_cards for _ in range(num_card_values)], agent, verbose=0) if not dishonesty_list: dishonesty_list = [0.1] * nplayers if not confidence_list: confidence_list = [1] * nplayers if not learn_list: learn_list = [False] * nplayers print "Players have dishonesty", dishonesty_list # print "Players have confidence", confidence_list apolicies = [] for t in range(nplayers): apolicies.append(policy.DishonestPolicy(game, dishonesty_list[t], confidence=confidence_list[t], learn=learn_list[t]).decision) game.setPolicies(apolicies) qlearning.explorationProb = 0 print "Simulating..." simulators.rlsimulate(game, qlearning, numTrials=trials, verbose=verbose) print "Wins observed:", game.wins print "Agent in position", agent, "has a win rate of", str(float(game.wins[agent])/sum(game.wins)), '\n'