예제 #1
0
파일: main.py 프로젝트: zydmayday/runfast
def testQValueNetwork(startTurn=0, loopNum=1000, type=''):
	agents = []
	win_nums = {}
	test_name = PLAYER_LIST[type][0]
	test_filename = TEST[type]
	if os.path.isfile(test_filename):
		with open(test_filename, 'r') as f:
			win_nums = pickle.load(f)

	for i in range(0, 3):
		playerName = PLAYER_LIST[type][i]
		nw = NETWORK[type](playerName)
		if playerName == test_name:
			nw.loadNet(playerName, startTurn)
		rfa = AGENT[type](playerName, nw)
		agents.append(rfa)
		 
	env = RunFastEnvironment()
	exp = Experiment(env, agents)

	for i in range(loopNum):
		if not win_nums.get(startTurn):
			win_nums[startTurn] = {}
		testHistory = exp.doTest(test_name)
		for j in range(0,3):
			playerName = PLAYER_LIST[type][j]
			if not win_nums[startTurn].get(playerName):
				win_nums[startTurn][playerName] = {'point': 0, 'win': 0}
			win_nums[startTurn][playerName]['point'] += testHistory[playerName]
			if testHistory['name'] == playerName:
				win_nums[startTurn][playerName]['win'] += 1
	with open(test_filename, 'w') as f:
		pickle.dump(win_nums, f)
예제 #2
0
def testQValueNetwork(startTurn=0, loopNum=1000, testName='player0', filename='test_winners_nn', playerNamePrefix='player', type=1, inputNum=192):
	'''
	在测试时,其他的两个agent都不选用最佳的network,只有测试对象使用
	然后测试对象每次选取最佳的行动,其他的两个agent有50%概率选择最佳行动,不过他们的net应该是最普通的,没有经过训练的
	winNums = {trainNum: {player0: {'point': xxx, 'win': yyy}, player1: {...}}}
	'''
	agents = []
	winNums = {}
	if os.path.isfile(filename):
		with open(filename, 'r') as f:
			winNums = pickle.load(f)

	print 'loading agents'
	for i in range(0, 3):
		playerName = PLAYER_LIST[i]
		nw = RunFastNetwork(playerName, inputNum=inputNum, hiddenNum=inputNum, outNum=1)
		if playerName == testName:
			nw.loadNet(testName, startTurn)
		rfa = RunFastAgent(playerName, nw)
		agents.append(rfa)
		 
	env = RunFastEnvironment()
	exp = Experiment(env, agents, type=type)

	print 'set up the experiment'

	for i in range(startTurn, startTurn + loopNum):
		if not winNums.get(startTurn):
			winNums[startTurn] = {}
		testHistory = exp.doTest(testName)
		for j in range(0,3):
			playerName = PLAYER_LIST[j]
			if not winNums[startTurn].get(playerName):
				winNums[startTurn][playerName] = {'point': 0, 'win': 0}
			winNums[startTurn][playerName]['point'] += testHistory[playerName]
			if testHistory['name'] == playerName:
				winNums[startTurn][playerName]['win'] += 1
		print str(i-startTurn), winNums

	print winNums
	with open(filename, 'w') as f:
		pickle.dump(winNums, f)
예제 #3
0
def testQValueNetwork(startTurn=0, loopNum=1000, testName='player0', filename='win_nums', type=1, inputNum=192):
	agents = []
	winNums = {}
	if os.path.isfile(filename):
		with open(filename, 'r') as f:
			winNums = pickle.load(f)

	print 'loading agents'
	for i in range(0, 3):
		playerName = PLAYER_LIST[i]
		nw = RunFastDeepNetwork(playerName, inputNum=inputNum, hidden1Num=inputNum, hidden2Num=inputNum, hidden3Num=inputNum, outNum=1)
		if playerName == testName:
			nw.loadNet(playerName, startTurn)
		rfa = RunFastAgent(playerName, nw)
		agents.append(rfa)
		 
	env = RunFastEnvironment()
	exp = Experiment(env, agents, type=type)

	print 'set up the experiment'

	for i in range(startTurn, startTurn + loopNum):
		if not winNums.get(startTurn):
			winNums[startTurn] = {}
		testHistory = exp.doTest(testName)
		for j in range(0,3):
			playerName = PLAYER_LIST[j]
			if not winNums[startTurn].get(playerName):
				winNums[startTurn][playerName] = {'point': 0, 'win': 0}
			winNums[startTurn][playerName]['point'] += testHistory[playerName]
			if testHistory['name'] == playerName:
				winNums[startTurn][playerName]['win'] += 1
		print str(i-startTurn), winNums

	print winNums
	with open(filename, 'w') as f:
		pickle.dump(winNums, f)