Beispiel #1
0
    def store_results(self, measure: "Measure", player_name: Optional[str],
                      results: Dict) -> None:
        key = measure.store_as
        # initialize the store_as object if we need to
        if key not in self.stored_data:
            self.stored_data[key] = {}

        if player_name:  # i.e. not store_singleton
            if (player_name in self.stored_data[key]
                    and results != self.stored_data[key][player_name]):
                msg = (
                    f"Overwriting stored data key {key}[{player_name}]. Previous"
                    f" value:\n{self.stored_data[key][player_name]}")
                logger.info(gudlog(msg, self))

            self.stored_data[key][player_name] = results

            msg = f"Stored {key}[{player_name}]:\n{results}"
            logger.debug(gudlog(msg, self))
        else:
            if self.stored_data[key] and results != self.stored_data[key]:
                msg = (f"Overwriting stored data key {key}. Previous"
                       f" value:\n{self.stored_data[key]}")
                logger.info(gudlog(msg, self))

            self.stored_data[key] = results

            msg = f"Stored {key}:\n{results}"
            logger.debug(gudlog(msg, self))
Beispiel #2
0
    async def play(self) -> None:
        # add local measures
        for md in conductor_local_measure_dicts:
            if md["name"] not in [x.name for x in self.measures
                                  ]:  # make sure it's not already there
                local_measure: "Measure" = MeasureSchema().load(md)
                local_measure.local_measure = True
                self.measures.insert(0, local_measure)

        # add local measure dependencies
        local_measure_names = [
            x["name"] for x in conductor_local_measure_dicts
        ]
        for m in self.measures:
            if m.name in local_measure_names:
                continue

            if "tune_orchestra" not in m.depends_on and m.name != "tune_orchestra":
                msg = "adding 'tune_orchestra' dependency"
                logger.debug(gudlog(msg, self, None, m.name))
                m.depends_on.insert(0, "tune_orchestra")

        # add conductor allhosts player
        if config.CONDUCTOR_ALLHOSTS_PLAYER_NAME not in self.players.keys():
            local_player: "Player" = PlayerSchema().load(
                {"name": config.CONDUCTOR_ALLHOSTS_PLAYER_NAME})
            self.players[config.CONDUCTOR_ALLHOSTS_PLAYER_NAME] = local_player

        # conductor player will operate over all hosts, including those not created yet
        all_hosts, err_msgs = self.get_all_hosts()
        for err_msg in err_msgs:
            logger.warning(gudlog(err_msg, self))
        self.players[config.CONDUCTOR_ALLHOSTS_PLAYER_NAME].hostnames = [
            h.name for h in all_hosts
        ]

        # make johann tarball in prep for tuning
        create_johann_tarball()

        self.state = TaskState.STARTED
        self.started_at = datetime.utcnow()
        # self.started_at = datetime.now(tz=pytz.utc)

        unqueued_measures = copy.copy(self.measures)
        while True:
            self.evaluate_state()

            if self.finished:
                break

            self.smart_queue(unqueued_measures)

            await asyncio.sleep(1)
Beispiel #3
0
    def copy_from(self, player: "Player",
                  score: "Score") -> Tuple[Optional[bool], Optional[str]]:
        """

        Args:
            player:
            score:

        Returns:
            None if no changes
            True if changed successfully
            False if player is not a Player object

        """
        if not isinstance(player, Player):
            return False, "not a Player object"

        if player.name != self.name:
            logger.warning(
                "copy_from() called with a player with a different name...was this"
                " intentional?")

        self.name = player.name
        changed = False

        if player.hostnames != self.hostnames:
            changed = True
            msg = "updating hostnames from {} to {}".format(
                self.hostnames, player.hostnames)
            logger.debug(gudlog(msg, score, self))
            self.hostnames = player.hostnames

        if player.scale != self.scale:
            changed = True
            msg = "updating scale from {} to {}".format(
                self.scale, player.scale)
            logger.debug(gudlog(msg, score, self))
            self.scale = player.scale

        if player.image != self.image:
            changed = True
            msg = "updating image from {} to {}".format(
                self.image, player.image)
            logger.debug(gudlog(msg, score, self))
            self.image = player.image

        if not changed:
            return None, None
        else:
            return True, None
Beispiel #4
0
    def evaluate_state(self, score: "Score") -> None:
        task_status = self.get_task_status(short=False)

        prior_state = self.state

        success = 0
        finished = 0
        for player_name, tstat in task_status.items():
            if tstat["finished"]:
                finished += 1
            if tstat["state"] == TaskState.SUCCESS:
                success += 1

            if task_state_priority(tstat["state"]) > task_state_priority(self.state):
                self.state = tstat["state"]

            if tstat["status"]:
                self.status[player_name] = tstat["status"]

        if self.state == TaskState.FAILURE and finished == len(task_status):
            self.finished = True

        if success == len(self.player_names):
            self.state = TaskState.SUCCESS
        if finished == len(self.player_names):
            self.finished = True

        if self.state == TaskState.FAILURE and self.state != prior_state:
            msg = f"measure '{self.name}' failed:\n{json.dumps(self.status, indent=2)}"
            logger.warning(gudlog(msg, score))

        self.store_results(score, task_status)
Beispiel #5
0
    def enqueue(
            self, score: "Score", measure: "Measure", func: str, delay: int,
            *args: Any) -> Tuple[bool, Optional[str], Optional["GroupResult"]]:
        signatures = []

        for hostname in self.hostnames:
            if hostname not in hosts:
                msg = "{} not found in dictionary of hosts".format(hostname)
                logger.warning(gudlog(msg, score, self, measure.name))
                return False, msg, None
            host = hosts[hostname]

            if measure.local_measure:
                sig = Player.get_local_task_signature(score.name, measure.name,
                                                      host, func, delay, *args)
            else:
                if host.pending_create:
                    msg = "{} still pending creation".format(host.name)
                    logger.warning(gudlog(msg, score, self, measure.name))
                    return False, msg, None

                sig = host.get_task_signature(score.name, self.name,
                                              measure.name, func, delay, *args)
                host.clear_finished_celery_task_ids()
                host.celery_task_ids.append(sig.id)

            if sig is None:
                msg = "task signature creation failed for hostname {}".format(
                    host.name)
                logger.warning(gudlog(msg, score, self, measure.name))
                return False, msg, None

            score.task_map[sig.id] = {
                "measure_name": measure.name,
                "player_name": self.name,
                "host_name": host.name,
            }
            signatures.append(sig)

        task_group = group(signatures)

        group_result = task_group.apply_async()

        return True, None, group_result
Beispiel #6
0
    async def play_the_player(self, measure: "Measure",
                              player: "Player") -> bool:
        delay = measure.start_delay

        # handle special arguments like random numbers and stored values
        try:
            if isinstance(delay, str):
                delay = transform_arg(self, measure, player, delay)
                assert isinstance(delay, int)
            new_args = transform_args(self, measure, player, measure.args)
        except (AssertionError, KeyError, ValueError):
            msg = gudexc(
                "Failed to queue measure -- bad special argument(s)",
                self,
                player,
                measure,
            )
            logger.error(msg)
            measure.state = TaskState.FAILURE
            # we can't set measure.finished yet; other players for this measure may be running
            measure.status["all"]["all"] = f"{msg}; see logs for details"
            return False

        msg = f"queueing measure {measure.name} with a delay of {delay} seconds"
        logger.info(gudlog(msg, self, player))

        msg = f"(transformed) args:\n{pprint.pformat(new_args, indent=4)}"
        logger.debug(gudlog(msg, self, player, measure))

        success, err_msg, group_task = player.enqueue(self, measure,
                                                      measure.task_name, delay,
                                                      *new_args)
        if success:
            measure.celery_group_tasks[player.name] = group_task
        else:
            msg = f"failed to play measure {measure.name}: {err_msg}"
            logger.warning(gudlog(msg, self, player))
            measure.state = TaskState.FAILURE
            measure.status[player.name]["all"] = msg
            return False
Beispiel #7
0
    def get_task_signature(
        self,
        score_name: str,
        player_name: str,
        measure_name: str,
        func: str,
        delay: int,
        *task_args,
        **task_kwargs,
    ) -> "Signature":
        if self.tuning:
            msg = (
                "host is tuning or pending tuning; strongly advise against running new"
                " tasks on it or they may be interrupted without warning or recovery"
            )
            logger.warning(
                gudlog(msg, score_name, player_name, None, self.name))
        description = f"{score_name}.{player_name}.{self.name}.{measure_name}"
        task_id = str(
            uuid4())  # we need to know the task_id a priori for score.task_map
        sig_opts = {
            "queue": self.name,
            "shadow": description,
            "task_id": task_id,
            "countdown": delay,
        }
        sig = signature(func,
                        args=task_args,
                        kwargs=task_kwargs,
                        options=sig_opts)
        msg = (f"task signature created for {func}{task_args} with"
               f" kwargs:\n{task_kwargs}\nand options:\n{sig_opts}")
        logger.log(
            5, gudlog(msg, score_name, player_name, measure_name, self.name))

        return sig
Beispiel #8
0
    def smart_queue(self,
                    unqueued_measures: List["Measure"]) -> Optional["Measure"]:
        to_queue = (
            None  # queue one at a time to ensure we catch any dependency failures
        )
        dependency_failed = []
        for m in unqueued_measures:
            # no dependencies
            if not m.depends_on:
                to_queue = m
                break
            else:
                # make sure dependencies finished before queueing
                # also if any dependencies failed, dependent measures fail too
                ready_to_queue = True  # until/unless False below

                for dep_name in m.depends_on:
                    depm = self.get_measure(dep_name)
                    if not depm.finished:
                        ready_to_queue = False
                        m.state = TaskState.DEFERRED

                    if depm.state == TaskState.FAILURE and not m.dependency_proof:
                        ready_to_queue = False
                        m.state = TaskState.FAILURE
                        m.finished = True
                        m.status["all"][
                            "all"] = f"dependency failed ({dep_name})"
                        dependency_failed.append(m)

                if ready_to_queue:
                    to_queue = m
                    break

        if to_queue is not None:
            self.queue_measure(to_queue)
            if to_queue in unqueued_measures:
                unqueued_measures.remove(to_queue)

        for to_remove in dependency_failed:
            msg = (
                f"measure '{to_remove.name}' failed because one or more of its"
                " dependencies failed")
            logger.warning(gudlog(msg, self))
            if to_remove in unqueued_measures:
                unqueued_measures.remove(to_remove)

        return to_queue
Beispiel #9
0
    def store_results(
        self, score: "Score", task_status: Dict[str, Any]
    ) -> Optional[Dict[str, Any]]:
        if not self.store_as:
            return None

        results = {}
        for player_name, tstat in task_status.items():
            results_p = {}
            for t_id, t in tstat["tasks"].items():
                t_result = None

                if "result" in t:
                    t_result = t["result"]
                elif (
                    self.store_interim_results
                    and "meta" in t
                    and t["meta"]
                    and "interim_result" in t["meta"]
                ):
                    t_result = t["meta"]["interim_result"]

                if t_result is not None:
                    if self.store_singleton:
                        score.store_results(self, None, t_result)

                    if t_id not in score.task_map:
                        msg = (
                            f"task {t_id} not found in task_map. This is probably bad."
                        )
                        logger.info(gudlog(msg, score, player_name, self))
                        results_p[t_id] = t_result
                    else:
                        results_p[score.task_map[t_id]["host_name"]] = t_result

            results[player_name] = results_p

            if results_p and not self.store_singleton:
                score.store_results(self, player_name, results_p)

        return results
Beispiel #10
0
    def evaluate_state(self) -> None:
        # make sure we are current on measure state
        for m in self.measures:
            if not m.finished:
                m.evaluate_state(self)

        not_success = 0
        unfinished = 0
        for m in self.measures:
            if not m.finished:
                unfinished += 1
            if m.state is not TaskState.SUCCESS:
                not_success += 1

            if task_state_priority(m.state) > task_state_priority(self.state):
                self.state = m.state

            if m.status:

                # replace task id with host name if known
                status_copy = copy.copy(m.status)
                for player_name, player_status in m.status.items():
                    new_player_status = {}
                    for task_id, status in player_status.items():
                        if task_id in self.task_map:
                            tmi = self.task_map[task_id]
                            new_player_status[tmi["host_name"]] = status
                        else:
                            new_player_status[task_id] = status
                    status_copy[player_name] = new_player_status

                self.status[m.name] = status_copy

        if not_success == 0:
            self.state = TaskState.SUCCESS

        if unfinished == 0:
            self.finished = True
            self.finished_at = datetime.utcnow()
            msg = f"finished with state {self.state}"
            logger.info(gudlog(msg, self))
Beispiel #11
0
    def validate_create_host_mappings(self) -> Tuple[bool, List[str]]:
        success = True
        err_msgs = []

        # map missing (to be created) hostnames to players
        if self.create_hosts:
            self.map_missing_hosts(get_host_names())

        for p in self.players.values():
            # skip local player
            if p.name == config.CONDUCTOR_ALLHOSTS_PLAYER_NAME:
                continue

            # validate hostnames length
            if p.hostnames == [] and not self.create_hosts:
                msg = f"{p.name}: no hosts mapped"
                if p.scale == 0 and config.ALLOW_EMPTY_PLAYER_HOSTS:
                    logger.debug(gudlog(msg, self))
                else:
                    success = False
                    logger.warning(gudlog(msg, self))
                    err_msgs.append(msg)
                    continue
            elif len(p.hostnames) != p.scale and not self.create_hosts:
                success = False
                msg = f"{p.name}: length of hosts does not match scale ({p.scale})"
                logger.warning(gudlog(msg, self))
                err_msgs.append(msg)
                continue

            for host_name in p.hostnames:
                if host_name not in hosts:
                    # create host obj
                    host_dict = {
                        "hostname": host_name,
                        "image": p.image,
                    }
                    try:
                        msg = (
                            f"Temporarily creating Host object for {host_name} with"
                            f" image {p.image}")
                        logger.debug(gudlog(msg, self, p))
                        host_obj = HostSchema().load(host_dict)
                    except MarshmallowValidationError:
                        success = False
                        msg = f"{host_name}: error creating Host object"
                        logger.warning(gudlog(msg, self, p))
                        err_msgs.append(msg)
                        continue
                elif hosts[host_name].get_image() not in [None, p.image]:
                    success = False
                    msg = (
                        f"{host_name}: Host object already exists with conflicting"
                        " image")
                    logger.warning(gudlog(msg, self, p))
                    err_msgs.append(msg)
                    continue
                elif hosts[host_name].tuning:
                    success = False
                    msg = (
                        f"host '{host_name}' is tuning or pending tuning; try again"
                        " soon")
                    logger.warning(gudlog(msg, self, p))
                    err_msgs.append(msg)
                    continue
                else:
                    host_obj = hosts[host_name]

                # make sure we have externally-accessible Redis if needed
                if host_obj.control_method not in config.HOST_CONTROL_EXTERNAL_REDIS:
                    success = False
                    msg = (
                        f"{host_name}: control_method '{host_obj.control_method}'"
                        " is not properly registered in"
                        " config.HOST_CONTROL_EXTERNAL_REDIS")
                    logger.warning(gudlog(msg, self, p))
                    err_msgs.append(msg)
                    continue
                elif (not config.REDIS_HOST_EXTERNAL
                      and config.HOST_CONTROL_EXTERNAL_REDIS[
                          host_obj.control_method]):
                    success = False
                    msg = (
                        f"host ('{host_name}') requires externally-accessible Redis,"
                        " but environment variable REDIS_HOST_EXTERNAL is not"
                        " specified -- it is usually easiest to include this in the"
                        " conductor's"
                        f" '{config.SRC_ROOT.joinpath(config.ENV_FILE)}'")
                    logger.warning(gudlog(msg, self, p))
                    err_msgs.append(msg)
                    continue

                # get HostControl object for this Host
                if host_obj.control_method.upper() == "DOCKER":
                    host_control_class = DockerHostControl
                else:
                    host_control_class, msg = get_host_control_class(
                        host_obj.control_method)
                    if not host_control_class:
                        success = False
                        msg = f"{host_name}: {msg}"
                        logger.warning(gudlog(msg, self, p))
                        err_msgs.append(msg)
                        continue

                # check if the host was recently confirmed to be turned on
                host_recently_confirmed_on = False
                if host_obj.last_confirmed_on:
                    check_age = (datetime.utcnow() -
                                 host_obj.last_confirmed_on).seconds
                    if check_age < config.HOST_CONFIRMED_ON_VALID_SECS:
                        host_recently_confirmed_on = True
                        msg = (
                            f"Host '{host_name}' with control_name"
                            f" '{host_obj.control_name}' confirmed to be on via"
                            f" {host_obj.control_method} {check_age}s ago; not checking"
                            " again")
                        logger.debug(msg)

                host_confirmed_on = False
                if not host_recently_confirmed_on:
                    control_name = host_obj.control_name or host_obj.name
                    host_confirmed_on = host_control_class.host_exists(
                        control_name)
                    if host_confirmed_on:
                        host_obj.last_confirmed_on = datetime.utcnow()
                        msg = (
                            f"Host '{host_name}' with control_name"
                            f" '{host_obj.control_name}' appears to exist via"
                            f" {host_obj.control_method}")
                        logger.debug(msg)

                if host_recently_confirmed_on or host_confirmed_on:
                    if (
                            host_name not in hosts
                    ):  # we may have created host_obj above and not yet in config.hosts
                        hosts[host_name] = host_obj
                        msg = (
                            f"Added new Host object for {host_name} with image"
                            f" {p.image}")
                        logger.info(gudlog(msg, self, p))
                elif not self.create_hosts:
                    success = False
                    msg = (
                        f"host '{host_name}' with control_name"
                        f" '{host_obj.control_name}' not found via (possibly default)"
                        f" control method '{host_obj.control_method}'")
                    logger.warning(gudlog(msg, self, p))
                    err_msgs.append(msg)
                    continue
                else:
                    # mark hosts that need to be created
                    hosts[host_name].pending_create = True
                    msg = f"host {host_name} marked for creation"
                    logger.debug(gudlog(msg, self, p))

        return success, err_msgs