def queryPlayerList(self, runId): if self.forPolicy is None: return requestJson(self.command + "/api/league/players/" + runId, self.secret) else: return requestJson( self.command + "/api/netplayers/" + self.forPolicy, self.secret)
def run(self): logMsg("Starting states downloader, storing files in", self.storageDirectory) while self.running: try: # first download the current file describing the states on the server list = requestJson(self.commandHost + "/api/state/list/" + self.runId, self.secret) sumNewStates = 0 newEntries = [] for remoteEntry in list: if not (remoteEntry["id"] in self.downloadedStatesObject): newEntries.append(remoteEntry) sumNewStates += remoteEntry["packageSize"] # download newest ones first, they are the most interesting newEntries.sort(key = lambda x: x["creation"], reverse=True) if len(newEntries) > 0: #logMsg("Found %i new state packages with %i states on the server!" % (len(newEntries), sumNewStates)) for newEntry in newEntries: statesData = requestBytes(self.commandHost + "/api/state/download/" + newEntry["id"], self.secret) storeFileUnderPath(os.path.join(self.storageDirectory, newEntry["id"]), statesData) self.downloadedStatesObject[newEntry["id"]] = newEntry self.downloadedStatesHistory.append(newEntry) self.downloadedStatesHistory.sort(key = lambda x: x["creation"], reverse=True) self.history = self.downloadedStatesHistory.copy() self.numStatesAvailable += newEntry["packageSize"] self.store() except Exception as error: logMsg("Could not download states, will try again soon", error) time.sleep(10) time.sleep(5)
def getRunConfig(runId, commandHost, secret): """ returns a path to a temporary file, which contains the run config. Use "with getRunConfig as configFile": """ runConfig = "" while True: try: runConfig = requestJson(commandHost + "/api/runs/" + runId, secret)["config"] break except Exception as error: logMsg( "Could not get run configuration for run, will try again soon", error) time.sleep(15) ff = tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+") ff.write(runConfig) ff.flush() logMsg("Using tempfile for configuration:", ff.name) return ff
def loopNetworksDownload(storage): secret = sys.argv[sys.argv.index("--secret") + 1] run = sys.argv[sys.argv.index("--run") + 1] commandHost = sys.argv[sys.argv.index("--command") + 1] while True: #1 get list of existing networks #2 for every network in the list, check if the file exists, if not download the network into the file #3 store the new list of networks networks = requestJson(commandHost + "/api/networks/list/" + run, secret) for network in networks: spath = os.path.join(storage, network["id"]) if not os.path.exists(spath): netbytes = requestBytes( commandHost + "/api/networks/download/" + network["id"], secret) storeFileUnderPath(spath, netbytes) logMsg("Downloaded a new network to %s" % spath) writeJsonFile(os.path.join(storage, "networks.json"), networks) time.sleep(2)
def test_create_run(self): rundata = self.postARun() runsList = requestJson(urlBase + "api/runs", config["secret"], retries=1) self.assertEqual(len(runsList), 1) self.assertEqual(runsList[0]["id"], rundata["id"]) self.assertEqual(runsList[0]["name"], rundata["name"]) self.assertEqual(runsList[0]["config"], rundata["config"]) self.assertEqual(runsList[0]["sha"], rundata["sha"]) post = requestJson(urlBase + "/api/runs/" + rundata["id"], config["secret"], retries=1) self.assertEqual(rundata["id"], post["id"]) self.assertEqual(rundata["name"], post["name"]) self.assertEqual(rundata["config"], post["config"]) self.assertEqual(rundata["sha"], post["sha"])
def procNetwork(self): logMsg("Started league management network thread!") while self.run is None: time.sleep(1) logMsg("League management got run!") proxyProc = tryPlayersProxyProcess(self.commandHost, self.secret) time.sleep(1) proxyProc.poll() while True: try: pendingReportDicts = [] while len(self.pendingReports) > 0: report = self.pendingReports.pop() rDict = dict() rDict["p1"] = report[0] rDict["p2"] = report[1] rDict["winner"] = report[2] rDict["policy"] = report[3] if self.reportNovelty: rDict["hashes"] = report[4] rDict["rewardOnlyWinners"] = self.rewardOnlyWinners pendingReportDicts.append(rDict) if len(pendingReportDicts) > 0: if self.reportNovelty: nurl = self.noveltyHost + "/report/" + self.run postJson(nurl, self.secret, pendingReportDicts) else: # call the players_proxy to reduce the number of http requests to the actual command server postJson("http://127.0.0.1:1337/players/" + self.run, self.secret, pendingReportDicts) # call the players_proxy to reduce the number of http requests to the actual command server self.playerList = requestJson( "http://127.0.0.1:1337/players/" + self.run, self.secret) print(self.playerList[0][0], self.playerList[1][0]) time.sleep(2) except Exception as error: print("Problem in PlayersSelfplay!", error) logMsg( "something bad happened to the league network thread, quitting worker!!!" ) exit(-1)
def test_network_posting(self): run = self.postARun() policy = PytorchPolicy(32, 1, 32, 3, 64, Connect4GameState(7, 6, 4), "cuda:0", "torch.optim.adamw.AdamW", { "lr": 0.001, "weight_decay": 0.0001 }) postBytes(urlBase + "api/networks/" + run["id"] + "/" + policy.getUUID(), config["secret"], encodeToBson(policy.store()), retries=1) networkList = requestJson(urlBase + "api/networks/list/" + run["id"], config["secret"], retries=1) self.assertEqual(len(networkList), 1) self.assertEqual(networkList[0]["id"], policy.getUUID()) redownloaded = decodeFromBson( requestBytes(urlBase + "api/networks/download/" + policy.getUUID(), config["secret"], retries=1)) game = Connect4GameState(7, 6, 4) game = game.playMove(np.random.randint(7)) game = game.playMove(np.random.randint(7)) policy.isRandom = False forwardResultPre = policy.forward([game])[0] preUUID = policy.getUUID() policy.reset() policy.isRandom = False forwardResultReset = policy.forward([game])[0] policy.load(redownloaded) policy.isRandom = False forwardResultPost = policy.forward([game])[0] self.assertEqual(preUUID, policy.getUUID()) self.assertTrue(np.all(forwardResultPre[0] == forwardResultPost[0])) self.assertTrue(np.all(forwardResultPre[1] == forwardResultPost[1])) self.assertFalse(np.all(forwardResultPre[0] == forwardResultReset[0])) self.assertFalse(np.all(forwardResultPre[1] == forwardResultReset[1]))
def pollWork(self): logMsg("Started work poll thread") lastSuccess = time.monotonic() while True: while (len(self.workQueue) == 1 and (time.monotonic() - max(lastSuccess, self.lastIterationCompleted)) > np.mean(self.iterateTimes) * 0.8) or len(self.workQueue) > 1: time.sleep(0.05) #print("wqueue length", len(self.workQueue), (time.monotonic() - max(lastSuccess, self.lastIterationCompleted)) > np.mean(self.iterateTimes) * 0.8) workList = requestJson(self.command + "/queue", "") if len(workList) > 0: pickWork = random.choice(workList[:5]) try: myWork = requestBytes(self.command + "/checkout/" + pickWork, "", retries=0) except: # somebody else took the work before us logMsg("Failed to checkout a task %s" % pickWork) time.sleep(0.3 + random.random() * 0.2) continue # decodedWork should be a list of game.store(), so load them via game.load() decodedWork = decodeFromBson(myWork) games = [self.initialState.load(w) for w in decodedWork] self.printNoWork = True logMsg("Got work: %i game states" % len(games)) dwork = dict() dwork["work"] = games dwork["id"] = pickWork self.workQueue.append(dwork) lastSuccess = time.monotonic() else: if self.printNoWork: logMsg("No work found on the server, will keep trying...") self.printNoWork = False time.sleep(0.5)
setproctitle.setproctitle("x0_distributed_setup") hasArgs = ("--secret" in sys.argv) and ("--run" in sys.argv) and ("--command" in sys.argv) if not hasArgs: raise Exception( "You need to provide arguments for the distributed worker: --secret <server password>, --run <uuid> and --command <command server host>!" ) secret = sys.argv[sys.argv.index("--secret") + 1] run = sys.argv[sys.argv.index("--run") + 1] commandHost = sys.argv[sys.argv.index("--command") + 1] runConfig = requestJson(commandHost + "/api/runs/" + run, secret) if "--fconfig" in sys.argv: # meant for frametime evaluator, not for general use. cfgPath = sys.argv[sys.argv.index("--fconfig") + 1] logMsg("Forced to use local file configuration %s" % cfgPath) else: cfgPath = os.path.join(os.getcwd(), "downloaded_config.yaml") with open(cfgPath, "w") as f: f.write(runConfig["config"]) logMsg("Downloaded configuration to", cfgPath) logMsg("Running configuration") core = mlConfigBasedMain(cfgPath) if "--training" in sys.argv:
def getNetworkList(self): return requestJson( self.commandHost + "/api/networks/list/" + self.runId, self.secret)
def getNextWork(): while True: nextWork = requestJson(commandHost + "/api/evaluations/", secret) if len(nextWork) > 0: return nextWork[0]["run"], nextWork[0]["network"] time.sleep(15)
with getRunConfig(run, commandHost, secret) as temp: core = loadMlConfig(temp.name) try: policy = core.worker.policy(recursive=True) except: policy = core.evalWorker.policy(recursive=True) unpackedNetwork = networks.downloadNetwork(network) policy.load(unpackedNetwork) logMsg("Evaluation: Loaded policy with id", policy.getUUID()) # this will be an empty dict for runs without a player-league, which will then have no effect bestPlayer = requestJson( commandHost + "/api/bestplayer/" + policy.getUUID(), secret) print("Got best player", bestPlayer) try: policyIterator = core.worker.policyIterator(recursive=True) except: policyIterator = core.evalWorker.policyIterator(recursive=True) # pick the best moves moveDecider moveDecider = TemperatureMoveDecider(-1) initialState = core.worker.initialState(recursive=True) networkPlayer = PolicyPlayer(policy, None, moveDecider) fullPlayer = PolicyIteratorPlayer(policy,
def test_state_posting(self): run1 = self.postARun() run2 = self.postARun() states1 = [makeReport() for _ in range(123)] states2 = [makeReport() for _ in range(456)] report1Id = requests.post(url=urlBase + "api/state/test/" + run1["id"], data=encodeToBson(states1), headers={ "secret": config["secret"] }).json() report2Id = requests.post(url=urlBase + "api/state/test2/" + run2["id"], data=encodeToBson(states2), headers={ "secret": config["secret"] }).json() listRun1 = requestJson(urlBase + "api/state/list/" + run1["id"], config["secret"], retries=1) listRun2 = requestJson(urlBase + "api/state/list/" + run2["id"], config["secret"], retries=1) self.assertEqual(len(listRun1), 1) self.assertEqual(len(listRun2), 1) self.assertEqual(listRun1[0]["id"], report1Id) self.assertEqual(listRun2[0]["id"], report2Id) self.assertEqual(listRun1[0]["worker"], "test") self.assertEqual(listRun2[0]["worker"], "test2") self.assertEqual(listRun1[0]["packageSize"], len(states1)) self.assertEqual(listRun2[0]["packageSize"], len(states2)) states1Downloaded = decodeFromBson( requestBytes(urlBase + "api/state/download/" + report1Id, config["secret"], retries=1)) states2Downloaded = decodeFromBson( requestBytes(urlBase + "api/state/download/" + report2Id, config["secret"], retries=1)) self.assertEqual(len(states1), len(states1Downloaded)) self.assertEqual(len(states2), len(states2Downloaded)) def checkStatesEqual(statesPre, statesPost): for pre, post in zip(statesPre, statesPost): piPre = pre["policyIterated"] piPost = post["policyIterated"] stPre = pre["state"] stPost = post["state"] del pre["policyIterated"] del post["policyIterated"] del pre["state"] del post["state"] self.assertDictEqual(pre, post) self.assertTrue(np.all(piPre == piPost)) self.assertTrue(np.all(stPre == stPost)) checkStatesEqual(states1, states1Downloaded) checkStatesEqual(states2, states2Downloaded)
def plotGroup(name, fig, ax, ax2, extraStats=None): runs = groups[name]["runs"] color = groups[name]["color"] extraCosts = 0 if "extraCost" in groups[name]: extraCosts = groups[name]["extraCost"] / 3600.0 datas = [] extras = [] for run in runs: data = requestJson(command + "/costs/" + run, "") costs = [] accs = [] for d in data: cost = d["cost"] acc = d["acc_mcts_moves"] if cost is not None and acc is not None: costs.append(cost + extraCosts) accs.append(acc) datas.append((costs, accs)) if extraStats is not None: edata = requestJson(command + "/tables/" + extraStats + "/" + run, "") costs = [] extraVals = [] for ed in edata: costs.append(ed["cost"] + extraCosts) extraVals.append(ed[extraStats]) extras.append((costs, extraVals)) if len(datas) > 1: if not "onlyMean" in groups[name] or not groups[name]["onlyMean"]: for cost, acc in datas: ax.plot(cost, acc, color=whitenColor(color, 0.3), linewidth=1) meanX, meanY = meanInterpolatedLine(datas) ax.plot(meanY, meanX, label=name, color=color + (1, ), linewidth=2) #annote_max(meanX, meanY, ax) mostLeft = meanY[0] * 0.99 mostRight = meanY[-1] * 1.01 elif len(datas) == 1: mostLeft = datas[0][0][0] * 0.99 mostRight = datas[0][0][0] * 1.01 ax.plot(datas[0][0], datas[0][1], color=whitenColor(color, 0.9), linewidth=1, label=name) #annote_max(datas[0][0], datas[0][1], ax) if extraStats is not None: if len(extras) > 1: for cost, acc in extras: ax2.plot(cost, acc, "--", color=whitenColor(color, 0.3), linewidth=1) meanX, meanY = meanInterpolatedLine(extras) ax2.plot(meanY, meanX, "--", label=name, color=color + (1, ), linewidth=2) elif len(extras) == 1: ax2.plot(extras[0][0], extras[0][1], "--", label=name, color=whitenColor(color, 0.9), linewidth=1) return mostLeft, mostRight
def getRunConfig(runId, networkFile, isTreeSelfPlayAr): """ returns a path to a temporary file, which contains the run config, modified to load the network from the given path, and to use a noop game reporter. Use "with getRunConfig as configFile": """ runConfig = "" while True: try: runConfig = requestJson(commandHost + "/api/runs/" + runId, secret)["config"] break except Exception as error: logMsg("Could not get run configuration for run, will try again soon", error) time.sleep(15) grkey = "noopGameReporter23233" plkey = "filePolicyLoader1212125" editConfig = yaml.load(runConfig) editConfig[grkey] = dict() editConfig[grkey]["name"] = "NoopGameReporter" editConfig[plkey] = dict() editConfig[plkey]["name"] = "FilePolicyUpdater" editConfig[plkey]["path"] = networkFile editConfig["worker"]["gameReporter"] = "$" + grkey mdkey = "moveDeciderTEMPTEMP123" editConfig[mdkey] = dict() editConfig[mdkey]["name"] = "TemperatureMoveDecider" editConfig[mdkey]["explorationPlyCount"] = 30 ff = tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+") if "evalAccess" in editConfig["worker"]: # for any form of eval access self play, # rewrite the config to use a normal self play worker, it doesn't really matter what code does the evaluation... # lcKey = "localEvalAccessFrametimeMeasurement" # editConfig[lcKey] = dict() # editConfig[lcKey]["name"] = "LocalEvaluationAccess" # editConfig[lcKey]["workerN"] = PROC_COUNT # editConfig[lcKey]["forceRun"] = runId # editConfig[lcKey]["forceCfg"] = ff.name # editConfig["worker"]["evalAccess"] = "$" + lcKey # if editConfig["worker"]["maxPendingPackages"] < PROC_COUNT * 2: # editConfig["worker"]["maxPendingPackages"] = PROC_COUNT * 2 # editConfig["evalWorker"]["policyUpdater"] = "$" + plkey # editConfig["evalWorker"]["isFrameTimeTest"] = True # isTreeSelfPlayAr.append(True) prevWorker = editConfig["worker"] evalWorker = editConfig["evalWorker"] nWorker = dict() nWorker["name"] = "LinearSelfPlayWorker" nWorker["initialState"] = prevWorker["initialState"] nWorker["policy"] = evalWorker["policy"] nWorker["policyIterator"] = evalWorker["policyIterator"] if "maxPackageSize" in prevWorker: nWorker["gameCount"] = prevWorker["maxPackageSize"] else: nWorker["gameCount"] = prevWorker["batchSize"] nWorker["moveDecider"] = "$" + mdkey nWorker["gameReporter"] = prevWorker["gameReporter"] nWorker["policyUpdater"] = "$" + plkey editConfig["worker"] = nWorker else: editConfig["worker"]["policyUpdater"] = "$" + plkey yaml.dump(editConfig, ff) ff.flush() logMsg("Using tempfile for configuration:", ff.name) return ff
"drawValue": (0, 1), "fpu": (0, 1) } optimizer = BayesianOptimization( f = scoreFunc, pbounds = pbounds, random_state= 1 ); logger = CombinedLogger("player_opt_logs.json") optimizer.subscribe(Events.OPTIMIZATION_STEP, logger) #get the best players of the given network, if any if networkId is not None: bestPlayer = requestJson(commandHost + "/api/bestplayer/" + networkId, secret) if len(bestPlayer) > 0: logMsg("Probing best player of the network!", bestPlayer) if "alphaBase" in bestPlayer: del bestPlayer["alphaBase"] optimizer.probe( params=bestPlayer ) # the known "good" parameters of the first hyperopt run logMsg("Probing known good parameters!") optimizer.probe( params={"cpuct": 1.545, "drawValue": 0.6913, "fpu": 0.8545} ) optimizer.maximize(