def testQValueNetwork(startTurn=0, loopNum=1000, type=''): agents = [] win_nums = {} test_name = PLAYER_LIST[type][0] test_filename = TEST[type] if os.path.isfile(test_filename): with open(test_filename, 'r') as f: win_nums = pickle.load(f) for i in range(0, 3): playerName = PLAYER_LIST[type][i] nw = NETWORK[type](playerName) if playerName == test_name: nw.loadNet(playerName, startTurn) rfa = AGENT[type](playerName, nw) agents.append(rfa) env = RunFastEnvironment() exp = Experiment(env, agents) for i in range(loopNum): if not win_nums.get(startTurn): win_nums[startTurn] = {} testHistory = exp.doTest(test_name) for j in range(0,3): playerName = PLAYER_LIST[type][j] if not win_nums[startTurn].get(playerName): win_nums[startTurn][playerName] = {'point': 0, 'win': 0} win_nums[startTurn][playerName]['point'] += testHistory[playerName] if testHistory['name'] == playerName: win_nums[startTurn][playerName]['win'] += 1 with open(test_filename, 'w') as f: pickle.dump(win_nums, f)
def testQValueNetwork(startTurn=0, loopNum=1000, testName='player0', filename='test_winners_nn', playerNamePrefix='player', type=1, inputNum=192): ''' 在测试时,其他的两个agent都不选用最佳的network,只有测试对象使用 然后测试对象每次选取最佳的行动,其他的两个agent有50%概率选择最佳行动,不过他们的net应该是最普通的,没有经过训练的 winNums = {trainNum: {player0: {'point': xxx, 'win': yyy}, player1: {...}}} ''' agents = [] winNums = {} if os.path.isfile(filename): with open(filename, 'r') as f: winNums = pickle.load(f) print 'loading agents' for i in range(0, 3): playerName = PLAYER_LIST[i] nw = RunFastNetwork(playerName, inputNum=inputNum, hiddenNum=inputNum, outNum=1) if playerName == testName: nw.loadNet(testName, startTurn) rfa = RunFastAgent(playerName, nw) agents.append(rfa) env = RunFastEnvironment() exp = Experiment(env, agents, type=type) print 'set up the experiment' for i in range(startTurn, startTurn + loopNum): if not winNums.get(startTurn): winNums[startTurn] = {} testHistory = exp.doTest(testName) for j in range(0,3): playerName = PLAYER_LIST[j] if not winNums[startTurn].get(playerName): winNums[startTurn][playerName] = {'point': 0, 'win': 0} winNums[startTurn][playerName]['point'] += testHistory[playerName] if testHistory['name'] == playerName: winNums[startTurn][playerName]['win'] += 1 print str(i-startTurn), winNums print winNums with open(filename, 'w') as f: pickle.dump(winNums, f)
def testQValueNetwork(startTurn=0, loopNum=1000, testName='player0', filename='win_nums', type=1, inputNum=192): agents = [] winNums = {} if os.path.isfile(filename): with open(filename, 'r') as f: winNums = pickle.load(f) print 'loading agents' for i in range(0, 3): playerName = PLAYER_LIST[i] nw = RunFastDeepNetwork(playerName, inputNum=inputNum, hidden1Num=inputNum, hidden2Num=inputNum, hidden3Num=inputNum, outNum=1) if playerName == testName: nw.loadNet(playerName, startTurn) rfa = RunFastAgent(playerName, nw) agents.append(rfa) env = RunFastEnvironment() exp = Experiment(env, agents, type=type) print 'set up the experiment' for i in range(startTurn, startTurn + loopNum): if not winNums.get(startTurn): winNums[startTurn] = {} testHistory = exp.doTest(testName) for j in range(0,3): playerName = PLAYER_LIST[j] if not winNums[startTurn].get(playerName): winNums[startTurn][playerName] = {'point': 0, 'win': 0} winNums[startTurn][playerName]['point'] += testHistory[playerName] if testHistory['name'] == playerName: winNums[startTurn][playerName]['win'] += 1 print str(i-startTurn), winNums print winNums with open(filename, 'w') as f: pickle.dump(winNums, f)