def trainQValueNetwork(loopNum=10000, startTurn=0, history_filename='train_winners_nn', inputNum=192, type=1): ''' 通过让三个agent互相玩游戏,然后来训练出一个Q值网络 三个agent的网络保存在playeri里面,数字分别代表的是训练了多少次后得出的网络 胜负情况记录在train_winners里面 ''' agents = [] winners = {} if os.path.isfile(history_filename): with open(history_filename, 'r') as f: winners = pickle.load(f) startTurn = sum([v for i,v in winners.items()]) for i in range(0, 3): playerName = PLAYER_LIST[i] nw = RunFastNetwork(playerName, inputNum=inputNum, hiddenNum=inputNum, outNum=1) nw.loadNet(playerName, startTurn) rfa = RunFastAgent(playerName, nw) agents.append(rfa) env = RunFastEnvironment() exp = Experiment(env, agents, type=type) for i in xrange(startTurn, startTurn + loopNum): if i % 200 == 0: for agent in agents: agent.saveNet() with open(history_filename, 'w') as f: pickle.dump(winners, f) # exp.setTurn(i) winner = exp.doEpisode() if winners.has_key(winner): winners[winner] += 1 else: winners[winner] = 1 for agent in agents: agent.saveNet() with open(history_filename, 'w') as f: pickle.dump(winners, f) print winners with open(history_filename, 'w') as f: pickle.dump(winners, f)
def trainDeepNetwork(loopNum=10000, startTurn=0, history_filename='train_winners_dn', type=1, inputNum=192): ''' 用深度网络来训练Q值 ''' agents = [] winners = {} # load history match if os.path.isfile(history_filename): with open(history_filename, 'r') as f: winners = pickle.load(f) startTurn = sum([v for i,v in winners.items()]) # load agents with network for i in range(0, 3): playerName = PLAYER_LIST[i] nw = RunFastDeepNetwork(playerName, inputNum=inputNum, hidden1Num=inputNum, hidden2Num=inputNum, hidden3Num=inputNum, outNum=1) nw.loadNet(playerName, startTurn) rfa = RunFastAgent(playerName, nw) agents.append(rfa) env = RunFastEnvironment() exp = Experiment(env, agents, type=type) for i in range(startTurn, startTurn + loopNum): if i % 200 == 0: for agent in agents: agent.saveNet() with open(history_filename, 'w') as f: pickle.dump(winners, f) # exp.setTurn(i) winner = exp.doEpisode() if winners.has_key(winner): winners[winner] += 1 else: winners[winner] = 1 for agent in agents: agent.saveNet() with open(history_filename, 'w') as f: pickle.dump(winners, f) print winners with open(history_filename, 'w') as f: pickle.dump(winners, f)