def ml_vs_titfortat_iterate(): print("Start an _iterated_ game between MachineLearning and Titfortat") prisoner_a = MachineLearning("A") prisoner_a.qlearning.gamma = 0.9 # We deeply care about future rewards prisoner_b = Titfortat("B") game = Game(prisoner_a, prisoner_b) game.play(10000) print("last 10 moves: ", prisoner_a.actions[-10:])
def ml_vs_ml_iterate(): print("Start an _iterated_ game between two MachineLearning bots") prisoner_a = MachineLearning("A") prisoner_a.qlearning.gamma = 0.9 # We care about future rewards prisoner_b = MachineLearning("B") prisoner_b.qlearning.gamma = 0.9 # We care about future rewards game = Game(prisoner_a, prisoner_b) game.play(10000) print("last 10 moves of a: ", prisoner_a.actions[-10:]) print("last 10 moves of b: ", prisoner_b.actions[-10:])
def deepqlearning_vs_titfortat_iterate(): print("Start an iterated game between DeepQLearning and Titfortat") agent_a = DeepQLearnerAdaptater(2, 1, decay=0.9, learning_rate=0.02, scope="dvt_i_a") prisoner_a = MachineLearning("A", agent=agent_a) prisoner_b = Titfortat("B") game = Game(prisoner_a, prisoner_b) game.play(10000) print('Last 1000 moves:') print('prisoner_a:', Counter(prisoner_a.actions[-1000:])) print('prisoner_b:', Counter(prisoner_b.actions[-1000:]))
def deepqlearning_vs_deepqlearning_non_iterate(): print("Start an non-iterated game between DeepQLearning and DeepQLearning") agent_a = DeepQLearnerAdaptater(2, 1, decay=0.0, learning_rate=0.02, scope="dvd_ni_a") prisoner_a = MachineLearning("A", agent=agent_a) agent_b = DeepQLearnerAdaptater(2, 1, decay=0.0, learning_rate=0.02, scope="dvd_ni_b") prisoner_b = MachineLearning("B", agent=agent_b) game = Game(prisoner_a, prisoner_b) game.play(10000) print('Last 1000 moves:') print('prisoner_a:', Counter(prisoner_a.actions[-1000:])) print('prisoner_b:', Counter(prisoner_b.actions[-1000:]))