def evaluate(): wins = [0, 0] for i in range(100): player1 = randomPlayer() player2 = nTuplesSystematic() winner = game2.play(othello.game(), game2.player_epsilon(lambda x: player1.play_next_move(x)),game2.player_epsilon(lambda x: player2.play_next_move(x)), False) if winner == 1: wins[0] += 1 elif winner == 2: wins[1] += 1 winner = game2.play(othello.game(),game2.player_epsilon(lambda x: player2.play_next_move(x)), game2.player_epsilon(lambda x: player1.play_next_move(x)), False) if winner == 2: wins[0] += 1 elif winner == 1: wins[1] += 1 print wins
child = [0]*populationsize for i in range(populationsize): playermaxx = nn.nn() for j in range(200): game2.play(othello.game(), game2.player(lambda x: playermaxx.play_move(x,0.3)),game2.player(lambda x: playermaxx.play_move(x,0.3)), False) playermaxx.reset() parent.append(playermaxx) for z in range(generations): win = [] for i in range(populationsize): winsfori=0 for j in range(100): winner = game2.play(othello.game(), game2.player_epsilon(lambda x: parent[i].play_move(x)),game2.player_epsilon(lambda x: nTuplesSystematicObject.play_next_move(x)), False) if winner == 1: winsfori += 1 winner = game2.play(othello.game(),game2.player_epsilon(lambda x: nTuplesSystematicObject.play_next_move(x)), game2.player_epsilon(lambda x: parent[i].play_move(x)), False) if winner == 2: winsfori += 1 win.append(winsfori) sortedwin = sorted(range(len(win)), key=lambda k: -1*win[k]) goodparents = [] for i in range(goodpopulationsize): a1 = parent[sortedwin[i]].layers[0].np['w'] a2 = parent[sortedwin[i]].layers[1].np['w'] a3 = []