Ejemplo n.º 1
0
 def queryPlayerList(self, runId):
     if self.forPolicy is None:
         return requestJson(self.command + "/api/league/players/" + runId,
                            self.secret)
     else:
         return requestJson(
             self.command + "/api/netplayers/" + self.forPolicy,
             self.secret)
Ejemplo n.º 2
0
    def run(self):
        logMsg("Starting states downloader, storing files in", self.storageDirectory)
        while self.running:
            try:
                # first download the current file describing the states on the server
                list = requestJson(self.commandHost + "/api/state/list/" + self.runId, self.secret)

                sumNewStates = 0
                newEntries = []
                for remoteEntry in list:
                    if not (remoteEntry["id"] in self.downloadedStatesObject):
                        newEntries.append(remoteEntry)
                        sumNewStates += remoteEntry["packageSize"]
                
                # download newest ones first, they are the most interesting
                newEntries.sort(key = lambda x: x["creation"], reverse=True)

                if len(newEntries) > 0:
                    #logMsg("Found %i new state packages with %i states on the server!" % (len(newEntries), sumNewStates))

                    for newEntry in newEntries:
                        statesData = requestBytes(self.commandHost + "/api/state/download/" + newEntry["id"], self.secret)
                        storeFileUnderPath(os.path.join(self.storageDirectory, newEntry["id"]), statesData)
                        self.downloadedStatesObject[newEntry["id"]] = newEntry
                        self.downloadedStatesHistory.append(newEntry)
                        self.downloadedStatesHistory.sort(key = lambda x: x["creation"], reverse=True)
                        self.history = self.downloadedStatesHistory.copy()
                        self.numStatesAvailable += newEntry["packageSize"]
                        self.store()

            except Exception as error:
                logMsg("Could not download states, will try again soon", error)
                time.sleep(10)
            
            time.sleep(5)
Ejemplo n.º 3
0
def getRunConfig(runId, commandHost, secret):
    """
    returns a path to a temporary file, which contains the run config.
    Use "with getRunConfig as configFile":
    """

    runConfig = ""
    while True:
        try:
            runConfig = requestJson(commandHost + "/api/runs/" + runId,
                                    secret)["config"]
            break
        except Exception as error:
            logMsg(
                "Could not get run configuration for run, will try again soon",
                error)
            time.sleep(15)

    ff = tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+")
    ff.write(runConfig)
    ff.flush()

    logMsg("Using tempfile for configuration:", ff.name)

    return ff
Ejemplo n.º 4
0
def loopNetworksDownload(storage):
    secret = sys.argv[sys.argv.index("--secret") + 1]
    run = sys.argv[sys.argv.index("--run") + 1]
    commandHost = sys.argv[sys.argv.index("--command") + 1]

    while True:

        #1 get list of existing networks
        #2 for every network in the list, check if the file exists, if not download the network into the file
        #3 store the new list of networks

        networks = requestJson(commandHost + "/api/networks/list/" + run,
                               secret)

        for network in networks:
            spath = os.path.join(storage, network["id"])
            if not os.path.exists(spath):
                netbytes = requestBytes(
                    commandHost + "/api/networks/download/" + network["id"],
                    secret)
                storeFileUnderPath(spath, netbytes)
                logMsg("Downloaded a new network to %s" % spath)

        writeJsonFile(os.path.join(storage, "networks.json"), networks)

        time.sleep(2)
Ejemplo n.º 5
0
    def test_create_run(self):
        rundata = self.postARun()

        runsList = requestJson(urlBase + "api/runs",
                               config["secret"],
                               retries=1)

        self.assertEqual(len(runsList), 1)
        self.assertEqual(runsList[0]["id"], rundata["id"])
        self.assertEqual(runsList[0]["name"], rundata["name"])
        self.assertEqual(runsList[0]["config"], rundata["config"])
        self.assertEqual(runsList[0]["sha"], rundata["sha"])

        post = requestJson(urlBase + "/api/runs/" + rundata["id"],
                           config["secret"],
                           retries=1)

        self.assertEqual(rundata["id"], post["id"])
        self.assertEqual(rundata["name"], post["name"])
        self.assertEqual(rundata["config"], post["config"])
        self.assertEqual(rundata["sha"], post["sha"])
Ejemplo n.º 6
0
    def procNetwork(self):
        logMsg("Started league management network thread!")
        while self.run is None:
            time.sleep(1)
        logMsg("League management got run!")
        proxyProc = tryPlayersProxyProcess(self.commandHost, self.secret)
        time.sleep(1)
        proxyProc.poll()

        while True:
            try:
                pendingReportDicts = []
                while len(self.pendingReports) > 0:
                    report = self.pendingReports.pop()
                    rDict = dict()
                    rDict["p1"] = report[0]
                    rDict["p2"] = report[1]
                    rDict["winner"] = report[2]
                    rDict["policy"] = report[3]
                    if self.reportNovelty:
                        rDict["hashes"] = report[4]
                        rDict["rewardOnlyWinners"] = self.rewardOnlyWinners
                    pendingReportDicts.append(rDict)
                if len(pendingReportDicts) > 0:
                    if self.reportNovelty:
                        nurl = self.noveltyHost + "/report/" + self.run
                        postJson(nurl, self.secret, pendingReportDicts)
                    else:
                        # call the players_proxy to reduce the number of http requests to the actual command server
                        postJson("http://127.0.0.1:1337/players/" + self.run,
                                 self.secret, pendingReportDicts)

                # call the players_proxy to reduce the number of http requests to the actual command server
                self.playerList = requestJson(
                    "http://127.0.0.1:1337/players/" + self.run, self.secret)

                print(self.playerList[0][0], self.playerList[1][0])

                time.sleep(2)

            except Exception as error:
                print("Problem in PlayersSelfplay!", error)

        logMsg(
            "something bad happened to the league network thread, quitting worker!!!"
        )
        exit(-1)
Ejemplo n.º 7
0
    def test_network_posting(self):
        run = self.postARun()

        policy = PytorchPolicy(32, 1, 32, 3, 64, Connect4GameState(7, 6, 4),
                               "cuda:0", "torch.optim.adamw.AdamW", {
                                   "lr": 0.001,
                                   "weight_decay": 0.0001
                               })

        postBytes(urlBase + "api/networks/" + run["id"] + "/" +
                  policy.getUUID(),
                  config["secret"],
                  encodeToBson(policy.store()),
                  retries=1)

        networkList = requestJson(urlBase + "api/networks/list/" + run["id"],
                                  config["secret"],
                                  retries=1)
        self.assertEqual(len(networkList), 1)
        self.assertEqual(networkList[0]["id"], policy.getUUID())

        redownloaded = decodeFromBson(
            requestBytes(urlBase + "api/networks/download/" + policy.getUUID(),
                         config["secret"],
                         retries=1))

        game = Connect4GameState(7, 6, 4)
        game = game.playMove(np.random.randint(7))
        game = game.playMove(np.random.randint(7))

        policy.isRandom = False
        forwardResultPre = policy.forward([game])[0]
        preUUID = policy.getUUID()
        policy.reset()
        policy.isRandom = False
        forwardResultReset = policy.forward([game])[0]
        policy.load(redownloaded)
        policy.isRandom = False
        forwardResultPost = policy.forward([game])[0]

        self.assertEqual(preUUID, policy.getUUID())

        self.assertTrue(np.all(forwardResultPre[0] == forwardResultPost[0]))
        self.assertTrue(np.all(forwardResultPre[1] == forwardResultPost[1]))

        self.assertFalse(np.all(forwardResultPre[0] == forwardResultReset[0]))
        self.assertFalse(np.all(forwardResultPre[1] == forwardResultReset[1]))
Ejemplo n.º 8
0
    def pollWork(self):
        logMsg("Started work poll thread")
        lastSuccess = time.monotonic()

        while True:
             
            while (len(self.workQueue) == 1 and (time.monotonic() - max(lastSuccess, self.lastIterationCompleted)) > np.mean(self.iterateTimes) * 0.8) or len(self.workQueue) > 1:
                time.sleep(0.05)

            #print("wqueue length", len(self.workQueue), (time.monotonic() - max(lastSuccess, self.lastIterationCompleted)) > np.mean(self.iterateTimes) * 0.8)

            workList = requestJson(self.command + "/queue", "")
            if len(workList) > 0:
                pickWork = random.choice(workList[:5])
                try:
                    myWork = requestBytes(self.command + "/checkout/" + pickWork, "", retries=0)
                except:
                    # somebody else took the work before us
                    logMsg("Failed to checkout a task %s" % pickWork)
                    time.sleep(0.3 + random.random() * 0.2)
                    continue

                # decodedWork should be a list of game.store(), so load them via game.load()
                decodedWork = decodeFromBson(myWork)
                games = [self.initialState.load(w) for w in decodedWork]

                self.printNoWork = True

                logMsg("Got work: %i game states" % len(games))
                dwork = dict()
                dwork["work"] = games
                dwork["id"] = pickWork
                self.workQueue.append(dwork)
                lastSuccess = time.monotonic()
            else:
                if self.printNoWork:
                    logMsg("No work found on the server, will keep trying...")
                    self.printNoWork = False
                time.sleep(0.5)
Ejemplo n.º 9
0
    setproctitle.setproctitle("x0_distributed_setup")

    hasArgs = ("--secret" in sys.argv) and ("--run"
                                            in sys.argv) and ("--command"
                                                              in sys.argv)

    if not hasArgs:
        raise Exception(
            "You need to provide arguments for the distributed worker: --secret <server password>, --run <uuid> and --command <command server host>!"
        )

    secret = sys.argv[sys.argv.index("--secret") + 1]
    run = sys.argv[sys.argv.index("--run") + 1]
    commandHost = sys.argv[sys.argv.index("--command") + 1]

    runConfig = requestJson(commandHost + "/api/runs/" + run, secret)

    if "--fconfig" in sys.argv:
        # meant for frametime evaluator, not for general use.
        cfgPath = sys.argv[sys.argv.index("--fconfig") + 1]
        logMsg("Forced to use local file configuration %s" % cfgPath)
    else:
        cfgPath = os.path.join(os.getcwd(), "downloaded_config.yaml")
        with open(cfgPath, "w") as f:
            f.write(runConfig["config"])
        logMsg("Downloaded configuration to", cfgPath)
        logMsg("Running configuration")

    core = mlConfigBasedMain(cfgPath)

    if "--training" in sys.argv:
Ejemplo n.º 10
0
 def getNetworkList(self):
     return requestJson(
         self.commandHost + "/api/networks/list/" + self.runId, self.secret)
Ejemplo n.º 11
0
 def getNextWork():
     while True:
         nextWork = requestJson(commandHost + "/api/evaluations/", secret)
         if len(nextWork) > 0:
             return nextWork[0]["run"], nextWork[0]["network"]
         time.sleep(15)
Ejemplo n.º 12
0
        with getRunConfig(run, commandHost, secret) as temp:
            core = loadMlConfig(temp.name)

        try:
            policy = core.worker.policy(recursive=True)
        except:
            policy = core.evalWorker.policy(recursive=True)

        unpackedNetwork = networks.downloadNetwork(network)
        policy.load(unpackedNetwork)

        logMsg("Evaluation: Loaded policy with id", policy.getUUID())

        # this will be an empty dict for runs without a player-league, which will then have no effect
        bestPlayer = requestJson(
            commandHost + "/api/bestplayer/" + policy.getUUID(), secret)

        print("Got best player", bestPlayer)

        try:
            policyIterator = core.worker.policyIterator(recursive=True)
        except:
            policyIterator = core.evalWorker.policyIterator(recursive=True)

        # pick the best moves moveDecider
        moveDecider = TemperatureMoveDecider(-1)

        initialState = core.worker.initialState(recursive=True)

        networkPlayer = PolicyPlayer(policy, None, moveDecider)
        fullPlayer = PolicyIteratorPlayer(policy,
Ejemplo n.º 13
0
    def test_state_posting(self):
        run1 = self.postARun()
        run2 = self.postARun()

        states1 = [makeReport() for _ in range(123)]
        states2 = [makeReport() for _ in range(456)]

        report1Id = requests.post(url=urlBase + "api/state/test/" + run1["id"],
                                  data=encodeToBson(states1),
                                  headers={
                                      "secret": config["secret"]
                                  }).json()

        report2Id = requests.post(url=urlBase + "api/state/test2/" +
                                  run2["id"],
                                  data=encodeToBson(states2),
                                  headers={
                                      "secret": config["secret"]
                                  }).json()

        listRun1 = requestJson(urlBase + "api/state/list/" + run1["id"],
                               config["secret"],
                               retries=1)
        listRun2 = requestJson(urlBase + "api/state/list/" + run2["id"],
                               config["secret"],
                               retries=1)

        self.assertEqual(len(listRun1), 1)
        self.assertEqual(len(listRun2), 1)

        self.assertEqual(listRun1[0]["id"], report1Id)
        self.assertEqual(listRun2[0]["id"], report2Id)

        self.assertEqual(listRun1[0]["worker"], "test")
        self.assertEqual(listRun2[0]["worker"], "test2")

        self.assertEqual(listRun1[0]["packageSize"], len(states1))
        self.assertEqual(listRun2[0]["packageSize"], len(states2))

        states1Downloaded = decodeFromBson(
            requestBytes(urlBase + "api/state/download/" + report1Id,
                         config["secret"],
                         retries=1))
        states2Downloaded = decodeFromBson(
            requestBytes(urlBase + "api/state/download/" + report2Id,
                         config["secret"],
                         retries=1))

        self.assertEqual(len(states1), len(states1Downloaded))
        self.assertEqual(len(states2), len(states2Downloaded))

        def checkStatesEqual(statesPre, statesPost):
            for pre, post in zip(statesPre, statesPost):
                piPre = pre["policyIterated"]
                piPost = post["policyIterated"]
                stPre = pre["state"]
                stPost = post["state"]

                del pre["policyIterated"]
                del post["policyIterated"]
                del pre["state"]
                del post["state"]

                self.assertDictEqual(pre, post)

                self.assertTrue(np.all(piPre == piPost))
                self.assertTrue(np.all(stPre == stPost))

        checkStatesEqual(states1, states1Downloaded)
        checkStatesEqual(states2, states2Downloaded)
Ejemplo n.º 14
0
def plotGroup(name, fig, ax, ax2, extraStats=None):
    runs = groups[name]["runs"]
    color = groups[name]["color"]
    extraCosts = 0
    if "extraCost" in groups[name]:
        extraCosts = groups[name]["extraCost"] / 3600.0

    datas = []

    extras = []

    for run in runs:
        data = requestJson(command + "/costs/" + run, "")
        costs = []
        accs = []
        for d in data:
            cost = d["cost"]
            acc = d["acc_mcts_moves"]
            if cost is not None and acc is not None:
                costs.append(cost + extraCosts)
                accs.append(acc)
        datas.append((costs, accs))

        if extraStats is not None:
            edata = requestJson(command + "/tables/" + extraStats + "/" + run,
                                "")
            costs = []
            extraVals = []
            for ed in edata:
                costs.append(ed["cost"] + extraCosts)
                extraVals.append(ed[extraStats])
            extras.append((costs, extraVals))

    if len(datas) > 1:
        if not "onlyMean" in groups[name] or not groups[name]["onlyMean"]:
            for cost, acc in datas:
                ax.plot(cost, acc, color=whitenColor(color, 0.3), linewidth=1)

        meanX, meanY = meanInterpolatedLine(datas)
        ax.plot(meanY, meanX, label=name, color=color + (1, ), linewidth=2)

        #annote_max(meanX, meanY, ax)

        mostLeft = meanY[0] * 0.99
        mostRight = meanY[-1] * 1.01
    elif len(datas) == 1:
        mostLeft = datas[0][0][0] * 0.99
        mostRight = datas[0][0][0] * 1.01
        ax.plot(datas[0][0],
                datas[0][1],
                color=whitenColor(color, 0.9),
                linewidth=1,
                label=name)

        #annote_max(datas[0][0], datas[0][1], ax)

    if extraStats is not None:
        if len(extras) > 1:
            for cost, acc in extras:
                ax2.plot(cost,
                         acc,
                         "--",
                         color=whitenColor(color, 0.3),
                         linewidth=1)

            meanX, meanY = meanInterpolatedLine(extras)
            ax2.plot(meanY,
                     meanX,
                     "--",
                     label=name,
                     color=color + (1, ),
                     linewidth=2)
        elif len(extras) == 1:
            ax2.plot(extras[0][0],
                     extras[0][1],
                     "--",
                     label=name,
                     color=whitenColor(color, 0.9),
                     linewidth=1)

    return mostLeft, mostRight
Ejemplo n.º 15
0
    def getRunConfig(runId, networkFile, isTreeSelfPlayAr):
        """
        returns a path to a temporary file, which contains the run config, modified to load the network from the given path,
        and to use a noop game reporter.
        Use "with getRunConfig as configFile":
        """

        runConfig = ""
        while True:
            try:
                runConfig = requestJson(commandHost + "/api/runs/" + runId, secret)["config"]
                break
            except Exception as error:
                logMsg("Could not get run configuration for run, will try again soon", error)
                time.sleep(15)

        grkey = "noopGameReporter23233"
        plkey = "filePolicyLoader1212125"

        editConfig = yaml.load(runConfig)
        editConfig[grkey] = dict()
        editConfig[grkey]["name"] = "NoopGameReporter"

        editConfig[plkey] = dict()
        editConfig[plkey]["name"] = "FilePolicyUpdater"
        editConfig[plkey]["path"] = networkFile

        editConfig["worker"]["gameReporter"] = "$" + grkey

        mdkey = "moveDeciderTEMPTEMP123"
        editConfig[mdkey] = dict()
        editConfig[mdkey]["name"] = "TemperatureMoveDecider"
        editConfig[mdkey]["explorationPlyCount"] = 30

        ff = tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+")

        if "evalAccess" in editConfig["worker"]:
            # for any form of eval access self play,
            # rewrite the config to use a normal self play worker, it doesn't really matter what code does the evaluation...

            # lcKey = "localEvalAccessFrametimeMeasurement"
            # editConfig[lcKey] = dict()
            # editConfig[lcKey]["name"] = "LocalEvaluationAccess"
            # editConfig[lcKey]["workerN"] = PROC_COUNT
            # editConfig[lcKey]["forceRun"] = runId
            # editConfig[lcKey]["forceCfg"] = ff.name
            # editConfig["worker"]["evalAccess"] = "$" + lcKey
            
            # if editConfig["worker"]["maxPendingPackages"] < PROC_COUNT * 2:
            #     editConfig["worker"]["maxPendingPackages"] = PROC_COUNT * 2

            # editConfig["evalWorker"]["policyUpdater"] = "$" + plkey
            # editConfig["evalWorker"]["isFrameTimeTest"] = True

            # isTreeSelfPlayAr.append(True)

            prevWorker = editConfig["worker"]
            evalWorker = editConfig["evalWorker"]

            nWorker = dict()
            nWorker["name"] = "LinearSelfPlayWorker"
            nWorker["initialState"] = prevWorker["initialState"]
            nWorker["policy"] = evalWorker["policy"]
            nWorker["policyIterator"] = evalWorker["policyIterator"]
            if "maxPackageSize" in prevWorker:
                nWorker["gameCount"] = prevWorker["maxPackageSize"]
            else:
                nWorker["gameCount"] = prevWorker["batchSize"]
            nWorker["moveDecider"] = "$" + mdkey
            nWorker["gameReporter"] = prevWorker["gameReporter"]
            nWorker["policyUpdater"] = "$" + plkey

            editConfig["worker"] = nWorker

        else:
            editConfig["worker"]["policyUpdater"] = "$" + plkey

        yaml.dump(editConfig, ff)
        ff.flush()

        logMsg("Using tempfile for configuration:", ff.name)

        return ff
Ejemplo n.º 16
0
        "drawValue": (0, 1),
        "fpu": (0, 1)
    }

    optimizer = BayesianOptimization(
        f = scoreFunc,
        pbounds = pbounds,
        random_state= 1
    );

    logger = CombinedLogger("player_opt_logs.json")
    optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

    #get the best players of the given network, if any
    if networkId is not None:
        bestPlayer = requestJson(commandHost + "/api/bestplayer/" + networkId, secret)
        if len(bestPlayer) > 0:
            logMsg("Probing best player of the network!", bestPlayer)
            if "alphaBase" in bestPlayer:
                del bestPlayer["alphaBase"]
            optimizer.probe(
                params=bestPlayer
            )

    # the known "good" parameters of the first hyperopt run
    logMsg("Probing known good parameters!")
    optimizer.probe(
        params={"cpuct": 1.545, "drawValue": 0.6913, "fpu": 0.8545}
    )

    optimizer.maximize(