Beispiel #1
0
 def calc_itp(self, run):
   itp = run['args']['throughput']
   if itp < 1:
     itp = 1
   elif itp > 500:
     itp = 500
   itp *= math.sqrt(estimate_game_duration(run['args']['tc'])/estimate_game_duration('10+0.1'))
   itp *= math.sqrt(run['args']['threads'])
   if 'sprt' not in run['args']:
     itp *= 0.5
   else:
     llr = run['args']['sprt'].get('llr',0)
     itp *= (5 + llr) / 5
   run['args']['itp'] = itp
Beispiel #2
0
def homepage_results(request):
    # Calculate games_per_minute from current machines
    games_per_minute = 0.0
    machines = request.rundb.get_machines()
    for machine in machines:
        machine['last_updated'] = delta_date(machine['last_updated'])
        if machine['nps'] != 0:
            games_per_minute += (
                (machine['nps'] / 1600000.0) *
                (60.0 / estimate_game_duration(machine['run']['args']['tc'])) *
                (int(machine['concurrency']) //
                 machine['run']['args'].get('threads', 1)))
    machines.reverse()
    # Get updated results for unfinished runs + finished runs
    (runs, pending_hours, cores,
     nps) = request.rundb.aggregate_unfinished_runs()
    return {
        **get_paginated_finished_runs(request),
        'runs': runs,
        'machines': machines,
        'pending_hours': '%.1f' % (pending_hours),
        'cores': cores,
        'nps': nps,
        'games_per_minute': int(games_per_minute),
    }
Beispiel #3
0
def homepage_results(request):
    # Calculate games_per_minute from current machines
    games_per_minute = 0.0
    machines = request.rundb.get_machines()
    for machine in machines:
        machine["last_updated"] = delta_date(machine["last_updated"])
        if machine["nps"] != 0:
            games_per_minute += (
                (machine["nps"] / 1200000.0) *
                (60.0 / estimate_game_duration(machine["run"]["args"]["tc"])) *
                (int(machine["concurrency"]) //
                 machine["run"]["args"].get("threads", 1)))
    machines.reverse()
    # Get updated results for unfinished runs + finished runs
    (runs, pending_hours, cores,
     nps) = request.rundb.aggregate_unfinished_runs()
    return {
        **get_paginated_finished_runs(request),
        "runs": runs,
        "machines": machines,
        "pending_hours": "%.1f" % (pending_hours),
        "cores": cores,
        "nps": nps,
        "games_per_minute": int(games_per_minute),
    }
Beispiel #4
0
def build_users(machines, info):
    for machine in machines:
        games_per_hour = (
            (machine["nps"] / 1080000.0)
            * (3600.0 / estimate_game_duration(machine["run"]["args"]["tc"]))
            * (int(machine["concurrency"]) // machine["run"]["args"].get("threads", 1))
        )
        info[machine["username"]]["games_per_hour"] += games_per_hour

    users = []
    for u in info.keys():
        user = info[u]
        try:
            if isinstance(user["last_updated"], str):
                diff = diff_date(user["task_last_updated"])
                user["diff"] = diff.total_seconds()
                user["last_updated"] = delta_date(diff)
            else:
                diff = diff_date(user["last_updated"])
                user["diff"] = diff.total_seconds()
                user["last_updated"] = delta_date(diff)
        except:
            pass
        users.append(user)

    users = [u for u in users if u["games"] > 0 or u["tests"] > 0]
    return users
Beispiel #5
0
 def calc_itp(self, run):
     itp = run["args"]["throughput"]
     if itp < 1:
         itp = 1
     elif itp > 500:
         itp = 500
     itp *= math.sqrt(
         estimate_game_duration(run["args"]["tc"]) / estimate_game_duration("10+0.1")
     )
     itp *= math.sqrt(run["args"]["threads"])
     if "sprt" not in run["args"]:
         itp *= 0.5
     else:
         llr = run["args"]["sprt"].get("llr", 0)
         itp *= (5 + llr) / 5
     run["args"]["itp"] = itp
Beispiel #6
0
def process_run(run, info, deltas=None):
    global skip
    if deltas and (skip or str(run["_id"]) in deltas):
        skip = True
        return
    if deltas != None and str(run["_id"]) in new_deltas:
        print("Warning: skipping repeated run!")
        return
    if "username" in run["args"]:
        username = run["args"]["username"]
        if username not in info:
            print("not in info: ", username)
            return
        else:
            info[username]["tests"] += 1

    tc = estimate_game_duration(run["args"]["tc"])
    for task in run["tasks"]:
        if "worker_info" not in task:
            continue
        username = task["worker_info"].get("username", None)
        if username == None:
            continue
        if username not in info:
            print("not in info: ", username)
            continue

        if "stats" in task:
            stats = task["stats"]
            num_games = stats["wins"] + stats["losses"] + stats["draws"]
        else:
            num_games = 0

        try:
            info[username]["last_updated"] = max(
                task["last_updated"], info[username]["last_updated"]
            )
            info[username]["task_last_updated"] = max(
                task["last_updated"], info[username]["last_updated"]
            )
        except:
            info[username]["last_updated"] = task["last_updated"]

        info[username]["cpu_hours"] += float(
            num_games * int(run["args"].get("threads", 1)) * tc / (60 * 60)
        )
        info[username]["games"] += num_games
    if deltas != None:
        new_deltas.update({str(run["_id"]): None})
Beispiel #7
0
    def worker_cap(self, run, worker_info):
        # Estimate how many games a worker will be able to run
        # during the time interval determined by "self.task_duration".
        # Make sure the result is properly quantized and not zero.

        game_time = estimate_game_duration(run["args"]["tc"])
        concurrency = worker_info["concurrency"] // run["args"]["threads"]
        assert concurrency >= 1
        games = self.task_duration / game_time * concurrency
        if "sprt" in run["args"]:
            batch_size = 2 * run["args"]["sprt"].get("batch_size", 1)
            games = max(batch_size,
                        batch_size * int(games / batch_size + 1 / 2))
        else:
            games = max(2, 2 * int(games / 2 + 1 / 2))
        return games
Beispiel #8
0
def process_run(run, info, deltas=None):
    global skip
    if deltas and (skip or str(run['_id']) in deltas):
        skip = True
        return
    if deltas != None and str(run['_id']) in new_deltas:
        print('Warning: skipping repeated run!')
        return
    if 'username' in run['args']:
        username = run['args']['username']
        if username not in info:
            print('not in info: ', username)
            return
        else:
            info[username]['tests'] += 1

    tc = estimate_game_duration(run['args']['tc'])
    for task in run['tasks']:
        if 'worker_info' not in task:
            continue
        username = task['worker_info'].get('username', None)
        if username == None:
            continue
        if username not in info:
            print('not in info: ', username)
            continue

        if 'stats' in task:
            stats = task['stats']
            num_games = stats['wins'] + stats['losses'] + stats['draws']
        else:
            num_games = 0

        try:
            info[username]['last_updated'] = max(
                task['last_updated'], info[username]['last_updated'])
        except:
            info[username]['last_updated'] = task['last_updated']

        info[username]['cpu_hours'] += float(
            num_games * int(run['args'].get('threads', 1)) * tc / (60 * 60))
        info[username]['games'] += num_games
    if deltas != None:
        new_deltas.update({str(run['_id']): None})
Beispiel #9
0
    def worker_cap(self, run, worker_info):
        # Estimate how many games a worker will be able to run
        # during the time interval determined by "self.task_duration".
        # Make sure the result is properly quantized and not zero.

        game_time = estimate_game_duration(run["args"]["tc"])
        concurrency = worker_info["concurrency"] // run["args"]["threads"]
        assert concurrency >= 1
        # as we have more tasks done (>1000), make them longer to avoid
        # having many tasks in long running tests
        scale_duration = 1 + (len(run["tasks"]) // 1000)**2
        games = self.task_duration * scale_duration / game_time * concurrency
        if "sprt" in run["args"]:
            batch_size = 2 * run["args"]["sprt"].get("batch_size", 1)
            games = max(batch_size,
                        batch_size * int(games / batch_size + 1 / 2))
        else:
            games = max(2, 2 * int(games / 2 + 1 / 2))
        return games
Beispiel #10
0
def build_users(machines, info):
    for machine in machines:
        games_per_hour = (machine['nps'] / 1600000.0) * (
            3600.0 / estimate_game_duration(machine['run']['args']['tc'])) * (
                int(machine['concurrency']) //
                machine['run']['args'].get('threads', 1))
        info[machine['username']]['games_per_hour'] += games_per_hour

    users = []
    for u in info.keys():
        user = info[u]
        try:
            user['last_updated'] = delta_date(user['last_updated'])
        except:
            pass
        users.append(user)

    users = [u for u in users if u['games'] > 0 or u['tests'] > 0]
    return users
Beispiel #11
0
    def sync_request_task(self, worker_info):

        unique_key = worker_info["unique_key"]

        # We get the list of unfinished runs.
        # To limit db access the list is cached for
        # 60 seconds.

        if time.time() > self.task_time + 60:
            if DEBUG:
                print("Refresh queue", flush=True)
            self.task_runs = []
            for r in self.get_unfinished_runs_id():
                run = self.get_run(r["_id"])
                self.sum_cores(run)
                self.calc_itp(run)
                self.task_runs.append(run)
            self.task_time = time.time()

        # We sort the list of unfinished runs according to priority.
        # Note that because of the caching, the properties of the
        # runs may have changed, so resorting is necessary.
        # Changes can be created by the code below or else in update_task().
        # Note that update_task() uses the same objects as here
        # (they are not copies).

        last_run_id = self.worker_runs.get(unique_key,
                                           {}).get("last_run", None)

        def priority(run):  # lower is better
            return (
                -run["args"]["priority"],
                # Try to find a new run for this worker.
                run["_id"] == last_run_id,
                run["cores"] / run["args"]["itp"] * 100.0,
                -run["args"]["itp"],
                run["_id"],
            )

        self.task_runs.sort(key=priority)

        # We go through the list of unfinished runs to see if the worker
        # has reached the number of allowed connections from the same ip
        # address.

        connections = 0
        for run in self.task_runs:
            for task in run["tasks"]:
                if (task["active"] and task["worker_info"]["remote_addr"]
                        == worker_info["remote_addr"]):
                    connections = connections + 1

        if connections >= self.userdb.get_machine_limit(
                worker_info["username"]):
            return {"task_waiting": False, "hit_machine_limit": True}

        # Collect some data about the worker that will be used below.

        # Memory
        max_threads = int(worker_info["concurrency"])
        min_threads = int(worker_info.get("min_threads", 1))
        max_memory = int(worker_info.get("max_memory", 0))

        # Is the worker near the github api limit?
        if "rate" in worker_info:
            rate = worker_info["rate"]
            near_github_api_limit = rate["remaining"] <= 2 * math.sqrt(
                rate["limit"])
        else:
            near_github_api_limit = False

        # Now go through the sorted list of unfinished runs.
        # We will add a task to the first run that is suitable.

        run_found = False

        for run in self.task_runs:
            if run["finished"]:
                continue

            if not run["approved"]:
                continue

            if run["args"]["threads"] > max_threads:
                continue

            if run["args"]["threads"] < min_threads:
                continue

            # Check if there aren't already enough workers
            # working on this run.
            committed_games = 0
            for task in run["tasks"]:
                if not task["active"]:
                    if "stats" in task:
                        stats = task["stats"]
                        committed_games += (stats["wins"] + stats["losses"] +
                                            stats["draws"])
                else:
                    committed_games += task["num_games"]

            remaining = run["args"]["num_games"] - committed_games
            if remaining <= 0:
                continue

            # We check if the worker has reserved enough memory
            need_tt = 0
            need_base = 0
            if max_memory > 0:

                def get_hash(s):
                    h = re.search("Hash=([0-9]+)", s)
                    if h:
                        return int(h.group(1))
                    return 0

                need_tt += get_hash(run["args"]["new_options"])
                need_tt += get_hash(run["args"]["base_options"])
                need_tt *= max_threads // run["args"]["threads"]
                # estime another 70MB per process for net (40) and other things besides hash
                need_base = 2 * 70 * (max_threads // run["args"]["threads"])

            if need_base + need_tt > max_memory:
                continue

            # Github API limit...
            if near_github_api_limit:
                have_binary = (unique_key in self.worker_runs
                               and run["_id"] in self.worker_runs[unique_key])
                if not have_binary:
                    continue

            # To avoid time losses in the case of large concurrency and short TC,
            # probably due to cutechess-cli as discussed in issue #822,
            # assign linux workers to LTC or multi-threaded jobs
            # and windows workers only to LTC jobs
            if max_threads >= 32:
                if "windows" in worker_info["uname"].lower():
                    short_tc = estimate_game_duration(
                        run["args"]["tc"]) <= estimate_game_duration("55+0.5")
                else:
                    short_tc = estimate_game_duration(run["args"]["tc"]) * run[
                        "args"]["threads"] <= estimate_game_duration("30+0.3")
                if short_tc:
                    continue

            # Limit the number of cores.
            # Currently this is only done for spsa.
            if "spsa" in run["args"]:
                limit_cores = 40000 / math.sqrt(
                    len(run["args"]["spsa"]["params"]))
            else:
                limit_cores = 1000000  # infinity

            cores = 0
            core_limit_reached = False
            for task in run["tasks"]:
                if task["active"]:
                    cores += task["worker_info"]["concurrency"]
                    if cores > limit_cores:
                        core_limit_reached = True
                        break

            if core_limit_reached:
                continue

            # If we make it here, it means we have found a run
            # suitable for a new task.
            run_found = True
            break

        # If there is no suitable run, tell the worker.
        if not run_found:
            return {"task_waiting": False}

        # Now we create a new task for this run.
        opening_offset = 0
        for task in run["tasks"]:
            opening_offset += task["num_games"]

        task_size = min(self.worker_cap(run, worker_info), remaining)
        task = {
            "num_games": task_size,
            "active": True,
            "worker_info": worker_info,
            "last_updated": datetime.utcnow(),
            "start": opening_offset,
            "stats": {
                "wins": 0,
                "losses": 0,
                "draws": 0,
                "pentanomial": 5 * [0]
            },
        }
        run["tasks"].append(task)

        task_id = len(run["tasks"]) - 1

        run["cores"] += task["worker_info"]["concurrency"]
        self.buffer(run, False)

        # Cache some data. Currently we record the id's
        # the worker has seen, as well as the last id that was seen.
        # Note that "worker_runs" is empty after a server restart.

        if unique_key not in self.worker_runs:
            self.worker_runs[unique_key] = {}

        if run["_id"] not in self.worker_runs[unique_key]:
            self.worker_runs[unique_key][run["_id"]] = True

        self.worker_runs[unique_key]["last_run"] = run["_id"]

        if DEBUG:
            print(
                "Allocate run: https://tests.stockfishchess.org/tests/view/{} task_id: {} to {}/{} Stats: {}"
                .format(
                    run["_id"],
                    task_id,
                    worker_info["username"],
                    unique_key,
                    run["tasks"][task_id]["stats"],
                ),
                flush=True,
            )
        return {"run": run, "task_id": task_id}
Beispiel #12
0
    def sync_request_task(self, worker_info):
        if time.time() > self.task_time + 60:
            self.task_runs = []
            for r in self.get_unfinished_runs_id():
                run = self.get_run(r["_id"])
                self.sum_cores(run)
                self.calc_itp(run)
                self.task_runs.append(run)
            self.task_runs.sort(key=lambda r: (
                -r["args"]["priority"],
                r["cores"] / r["args"]["itp"] * 100.0,
                -r["args"]["itp"],
                r["_id"],
            ))
            self.task_time = time.time()

        max_threads = int(worker_info["concurrency"])
        min_threads = int(worker_info.get("min_threads", 1))
        max_memory = int(worker_info.get("max_memory", 0))

        # We need to allocate a new task, but first check we don't have the same
        # machine already running because multiple connections are not allowed.
        connections = 0
        for run in self.task_runs:
            for task in run["tasks"]:
                if (task["active"] and task["worker_info"]["remote_addr"]
                        == worker_info["remote_addr"]):
                    connections = connections + 1

        # Allow a few connections, for multiple computers on same IP
        if connections >= self.userdb.get_machine_limit(
                worker_info["username"]):
            return {"task_waiting": False, "hit_machine_limit": True}

        # Limit worker Github API calls
        if "rate" in worker_info:
            rate = worker_info["rate"]
            limit = rate["remaining"] <= 2 * math.sqrt(rate["limit"])
        else:
            limit = False
        worker_key = worker_info["unique_key"]

        # Get a new task that matches the worker requirements
        run_found = False
        for run in self.task_runs:
            # compute required TT memory
            need_tt = 0
            if max_memory > 0:

                def get_hash(s):
                    h = re.search("Hash=([0-9]+)", s)
                    if h:
                        return int(h.group(1))
                    return 0

                need_tt += get_hash(run["args"]["new_options"])
                need_tt += get_hash(run["args"]["base_options"])
                need_tt *= max_threads // run["args"]["threads"]

            if (run["approved"] and
                (not limit or (worker_key in self.worker_runs
                               and run["_id"] in self.worker_runs[worker_key]))
                    and run["args"]["threads"] <= max_threads
                    and run["args"]["threads"] >= min_threads
                    and need_tt <= max_memory
                    # To avoid time losses in the case of large concurrency and short TC,
                    # probably due to cutechess-cli as discussed in issue #822,
                    # assign those workers to LTC or multi-threaded jobs.
                    and
                (max_threads < 32
                 or estimate_game_duration(run["args"]["tc"]) *
                 run["args"]["threads"] > estimate_game_duration("30+0.3"))):
                task_id = -1
                cores = 0
                if "spsa" in run["args"]:
                    limit_cores = 40000 / math.sqrt(
                        len(run["args"]["spsa"]["params"]))
                else:
                    limit_cores = 1000000  # No limit for SPRT
                for task in run["tasks"]:
                    if task["active"]:
                        cores += task["worker_info"]["concurrency"]
                        if cores > limit_cores:
                            break
                    task_id = task_id + 1
                    if not task["active"] and task["pending"]:
                        task["worker_info"] = worker_info
                        task["last_updated"] = datetime.utcnow()
                        task["active"] = True
                        run_found = True
                        break
            if run_found:
                break

        if not run_found:
            return {"task_waiting": False}

        self.sum_cores(run)
        self.task_runs.sort(key=lambda r: (
            -r["args"]["priority"],
            r["cores"] / r["args"]["itp"] * 100.0,
            -r["args"]["itp"],
            r["_id"],
        ))

        self.buffer(run, False)

        # Update worker_runs (compiled tests)
        if worker_key not in self.worker_runs:
            self.worker_runs[worker_key] = {}
        if run["_id"] not in self.worker_runs[worker_key]:
            self.worker_runs[worker_key][run["_id"]] = True

        if "stats" not in run["tasks"][task_id]:
            run["tasks"][task_id]["stats"] = {
                "wins": 0,
                "losses": 0,
                "draws": 0,
                "pentanomial": 5 * [0],
            }
        return {"run": run, "task_id": task_id}