def trainDeepNetworkWithMemory(loopNum=30000, startTurn=0, history_filename='train_winners_dn_with_memory_1000', inputNum=192, type=1): ''' 使用带记忆的方式来训练深度神经网络 ''' agents = [] winners = {} # load history match if os.path.isfile(history_filename): with open(history_filename, 'r') as f: winners = pickle.load(f) startTurn = sum([v for i,v in winners.items()]) print startTurn # load agents with network for i in range(0, 3): playerName = PLAYER_LIST[i] nw = RunFastDeepNetwork(playerName, inputNum=inputNum, hidden1Num=inputNum, hidden2Num=inputNum, hidden3Num=inputNum, outNum=1) nw.loadNet(playerName, startTurn) rfa = RunFastAgentWithMemory(playerName, nw) agents.append(rfa) env = RunFastEnvironment() exp = Experiment(env, agents, type=type) for i in range(startTurn, startTurn + loopNum): # exp.setTurn(i) if i % 200 == 0: for agent in agents: agent.saveNet() with open(history_filename, 'w') as f: pickle.dump(winners, f) winner = exp.doEpisodeWithMemory(capacity=1000) if winners.has_key(winner): winners[winner] += 1 else: winners[winner] = 1 for agent in agents: agent.saveNet() with open(history_filename, 'w') as f: pickle.dump(winners, f) print winners with open(history_filename, 'w') as f: pickle.dump(winners, f)