def play(list_of_players, rng, v=False): pathData = os.path.join('data', 'simulations') game = Games.PrisonersDilemma('Joshua') action_space = game.actionSpace L = len(list_of_players) avgScoreM = np.zeros([L, L]) for i in range(5): if v: print(i) for k in list_of_players: if k.name[:6] == 'Neural': k.prepThread(len(list_of_players), 16) else: k.clearHistory(len(list_of_players)) game.tournament(list_of_players, 200, rng, True) for j in range(L): for k in range(L): avgScoreM[j, k] += np.sum( list_of_players[j].lastScore[k, :]) / 200 try: list_of_players[j].resetState() except: pass #print(avgScoreM/100) df = {} #arr = np.array(list_avgScore) for i in range(avgScoreM.shape[0]): df[list_of_players[i].name] = avgScoreM[i, :] / 5 df = pd.DataFrame(df) df.to_csv(os.path.join(pathData, 'Matrix_avgPlayers_rng.csv'))
def stability(nReplicates, epochs, gamma, v=False): game = Games.PrisonersDilemma('Joshua') action_space = game.actionSpace pathData = os.path.join('data', 'simulations') avgScoreM = np.zeros(epochs * nReplicates) avg_op = np.zeros(epochs * nReplicates) avg_me = np.zeros(epochs * nReplicates) avg_switches_op = np.zeros(epochs * nReplicates) avg_switches_me = np.zeros(epochs * nReplicates) f = 0 for i in range(nReplicates): player1 = strat.Neural203Agent(name='Neural', actionSpace=game.actionSpace) player1.gamma = gamma player3 = strat.TitF2tatAgent(name='TitF2Tat') list_of_players = [player1, player3] if v: print('process id:', os.getpid(), '\tReplicat nr:', i) for j in range(epochs * i, epochs * i + epochs): for k in list_of_players: if k.name == 'Neural': k.prepThread(len(list_of_players) - 1 + f, 10) else: k.clearHistory(len(list_of_players) - 1 + f) game.tournament(list_of_players, 200, 0.05, False) avgScoreM[j] = np.sum(list_of_players[0].lastScore) / (200) avg_op[j] = np.sum(list_of_players[0].lastOp) / (200) avg_me[j] = np.sum(list_of_players[0].lastMe) / (200) avg_switches_op[j] = countSwitches( list_of_players[0].lastOp[0]) / (200) avg_switches_me[j] = countSwitches( list_of_players[0].lastMe[0]) / (200) for j in list_of_players: if j.name == 'Neural': j.train() df = {} df['avgScore'] = avgScoreM df['avgOp'] = avg_op df['avgMe'] = avg_me df['avgSOp'] = avg_switches_op df['avgSMe'] = avg_switches_me df = pd.DataFrame(df) df.to_csv(os.path.join(pathData, 'Stability_data_gamma{}'.format(gamma)))
def train(list_of_players, rng=0.0, v=False): pathData = os.path.join('data', 'training') pathModel = 'model' game = Games.PrisonersDilemma('Joshua') action_space = game.actionSpace list_avgScore = [] noise = 0.2 bestScore = 0 for i in range(1000): if v: print(i) if i % 10 == 0: noise *= 0.9 noise = np.max([noise, 0.05]) for k in list_of_players: if k.name[:6] == 'Neural': k.prepThread(len(list_of_players) - 1, 16) else: k.clearHistory(len(list_of_players) - 1) game.tournament(list_of_players, 200, noise) avgScore = [] for j in list_of_players: avgScore.append( np.sum(j.lastScore) / (200 * (len(list_of_players) - 1))) if j.name[:6] == 'Neural': j.train() try: j.resetState() except: pass list_avgScore.append(avgScore) if (bestScore < avgScore[0]): list_of_players[0].saveModel(path=pathModel) bestScore = avgScore[0] df = {} arr = np.array(list_avgScore) for i in range(arr.shape[1]): df[list_of_players[i].name] = arr[:, i] df = pd.DataFrame(df) df.to_csv(os.path.join(pathData, list_of_players[0].name + '.csv'))