Beispiel #1
0
    def test_monitor_run_debouncing_non_terminal_state(self, db: Session,
                                                       client: TestClient):
        # set monitoring interval so debouncing will be active
        config.runs_monitoring_interval = 100

        # Mocking the SDK updating the Run's state to terminal state
        self.run["status"]["state"] = RunStates.completed
        self.run["status"]["last_update"] = now_date().isoformat()
        get_db().store_run(db, self.run, self.run_uid, self.project)

        # Mocking pod that is still in non-terminal state
        self._mock_list_namespaced_pods([[self.running_pod]])

        # Triggering monitor cycle
        self.runtime_handler.monitor_runs(get_db(), db)

        # verifying monitoring was debounced
        self._assert_run_reached_state(db, self.project, self.run_uid,
                                       RunStates.completed)

        # Mocking that update occurred before debounced period
        debounce_period = config.runs_monitoring_interval
        self.run["status"]["last_update"] = (
            now_date() -
            timedelta(seconds=float(2 * debounce_period))).isoformat()
        get_db().store_run(db, self.run, self.run_uid, self.project)

        # Mocking pod that is still in non-terminal state
        self._mock_list_namespaced_pods([[self.running_pod]])

        # Triggering monitor cycle
        self.runtime_handler.monitor_runs(get_db(), db)

        # verifying monitoring was not debounced
        self._assert_run_reached_state(db, self.project, self.run_uid,
                                       RunStates.running)

        # Mocking pod that is in terminal state (extra one for the log collection)
        self._mock_list_namespaced_pods([[self.completed_pod],
                                         [self.completed_pod]])

        # Mocking read log calls
        log = self._mock_read_namespaced_pod_log()

        # Triggering monitor cycle
        self.runtime_handler.monitor_runs(get_db(), db)

        # verifying monitoring was not debounced
        self._assert_run_reached_state(db, self.project, self.run_uid,
                                       RunStates.completed)

        self._assert_run_logs(
            db,
            self.project,
            self.run_uid,
            log,
            self.completed_pod.metadata.name,
        )
Beispiel #2
0
 def _store_run_fixture(self, db: Session):
     self.run = {
         "status": {
             "state": RunStates.created,
             "last_update": now_date().isoformat(),
         },
         "metadata": {"project": self.project, "uid": self.run_uid},
     }
     get_db().store_run(db, self.run, self.run_uid, self.project)
Beispiel #3
0
 def update_resource_status(self, status="", producer=None):
     """update the data target status"""
     self._target = self._target or DataTarget(self.kind, self.name,
                                               self._target_path)
     target = self._target
     target.status = status or target.status or "created"
     target.updated = now_date().isoformat()
     target.producer = producer or target.producer
     self._resource.status.update_target(target)
Beispiel #4
0
    def get_log(
        db_session: Session,
        project: str,
        uid: str,
        size: int = -1,
        offset: int = 0,
        source: LogSources = LogSources.AUTO,
    ):
        out = b""
        log_file = log_path(project, uid)
        status = None
        if log_file.exists() and source in [
                LogSources.AUTO, LogSources.PERSISTENCY
        ]:
            with log_file.open("rb") as fp:
                fp.seek(offset)
                out = fp.read(size)
            status = ""
        elif source in [LogSources.AUTO, LogSources.K8S]:
            data = get_db().read_run(db_session, uid, project)
            if not data:
                log_and_raise(HTTPStatus.NOT_FOUND, project=project, uid=uid)

            status = get_in(data, "status.state", "")
            if get_k8s():
                pods = get_k8s().get_logger_pods(uid)
                if pods:
                    pod, new_status = list(pods.items())[0]
                    new_status = new_status.lower()

                    # TODO: handle in cron/tracking
                    if new_status != "pending":
                        resp = get_k8s().logs(pod)
                        if resp:
                            out = resp.encode()[offset:]
                        if status == "running":
                            now = now_date().isoformat()
                            update_in(data, "status.last_update", now)
                            if new_status == "failed":
                                update_in(data, "status.state", "error")
                                update_in(data, "status.error",
                                          "error, check logs")
                                get_db().store_run(db_session, data, uid,
                                                   project)
                            if new_status == "succeeded":
                                update_in(data, "status.state", "completed")
                                get_db().store_run(db_session, data, uid,
                                                   project)
                    status = new_status
                elif status == "running":
                    update_in(data, "status.state", "error")
                    update_in(data, "status.error",
                              "pod not found, maybe terminated")
                    get_db().store_run(db_session, data, uid, project)
                    status = "failed"
        return out, status
Beispiel #5
0
def get_log(project, uid):
    size = int(request.args.get('size', '-1'))
    offset = int(request.args.get('offset', '0'))

    out = b''
    log_file = log_path(project, uid)
    if log_file.exists():
        with log_file.open('rb') as fp:
            fp.seek(offset)
            out = fp.read(size)
        status = ''
    else:
        data = _db.read_run(uid, project)
        if not data:
            return json_error(HTTPStatus.NOT_FOUND,
                              project=project, uid=uid)

        status = get_in(data, 'status.state', '')
        if _k8s:
            pods = _k8s.get_logger_pods(uid)
            if pods:
                pod, new_status = list(pods.items())[0]
                new_status = new_status.lower()

                # TODO: handle in cron/tracking
                if new_status != 'pending':
                    resp = _k8s.logs(pod)
                    if resp:
                        out = resp.encode()[offset:]
                    if status == 'running':
                        now = now_date().isoformat()
                        update_in(data, 'status.last_update', now)
                        if new_status == 'failed':
                            update_in(data, 'status.state', 'error')
                            update_in(
                                data, 'status.error', 'error, check logs')
                            _db.store_run(data, uid, project)
                        if new_status == 'succeeded':
                            update_in(data, 'status.state', 'completed')
                            _db.store_run(data, uid, project)
                status = new_status
            elif status == 'running':
                update_in(data, 'status.state', 'error')
                update_in(
                    data, 'status.error', 'pod not found, maybe terminated')
                _db.store_run(data, uid, project)
                status = 'failed'

    return Response(out, mimetype='text/plain',
                    headers={"pod_status": status})
Beispiel #6
0
    def push(self, start, request, resp=None, op=None, error=None):
        if error:
            data = self.base_data()
            data["request"] = request
            data["op"] = op
            data["when"] = str(start)
            message = str(error)
            if self.verbose:
                message = f"{message}\n{traceback.format_exc()}"
            data["error"] = message
            self.output_stream.push([data])
            return

        self._sample_iter = (self._sample_iter + 1) % self.stream_sample
        if self.output_stream and self._sample_iter == 0:
            microsec = (now_date() - start).microseconds

            if self.stream_batch > 1:
                if self._batch_iter == 0:
                    self._batch = []
                self._batch.append([
                    request, op, resp,
                    str(start), microsec, self.model.metrics
                ])
                self._batch_iter = (self._batch_iter + 1) % self.stream_batch

                if self._batch_iter == 0:
                    data = self.base_data()
                    data["headers"] = [
                        "request",
                        "op",
                        "resp",
                        "when",
                        "microsec",
                        "metrics",
                    ]
                    data["values"] = self._batch
                    self.output_stream.push([data])
            else:
                data = self.base_data()
                data["request"] = request
                data["op"] = op
                data["resp"] = resp
                data["when"] = str(start)
                data["microsec"] = microsec
                if getattr(self.model, "metrics", None):
                    data["metrics"] = self.model.metrics
                self.output_stream.push([data])
Beispiel #7
0
    def do_event(self, event, *args, **kwargs):
        """main model event handler method"""
        start = now_date()
        op = event.path.strip("/")

        if op == "predict" or op == "infer":
            # predict operation
            request = self._pre_event_processing_actions(event, op)
            try:
                outputs = self.predict(request)
            except Exception as exc:
                if self._model_logger:
                    self._model_logger.push(start, request, op=op, error=exc)
                raise exc

            response = {
                "id": request["id"],
                "model_name": self.name,
                "outputs": outputs,
            }
            if self.version:
                response["model_version"] = self.version

        elif op == "ready" and event.method == "GET":
            # get model health operation
            setattr(event, "terminated", True)
            if self.ready:
                event.body = self.context.Response()
            else:
                event.body = self.context.Response(status_code=408,
                                                   body=b"model not ready")
            return event

        elif op == "" and event.method == "GET":
            # get model metadata operation
            setattr(event, "terminated", True)
            event.body = {
                "name": self.name,
                "version": self.version,
                "inputs": [],
                "outputs": [],
            }
            if self.model_spec:
                event.body["inputs"] = self.model_spec.inputs
                event.body["outputs"] = self.model_spec.outputs
            return event

        elif op == "explain":
            # explain operation
            request = self._pre_event_processing_actions(event, op)
            try:
                outputs = self.explain(request)
            except Exception as exc:
                if self._model_logger:
                    self._model_logger.push(start, request, op=op, error=exc)
                raise exc

            response = {
                "id": request["id"],
                "model_name": self.name,
                "outputs": outputs,
            }
            if self.version:
                response["model_version"] = self.version

        elif hasattr(self, "op_" + op):
            # custom operation (child methods starting with "op_")
            response = getattr(self, "op_" + op)(event)
            event.body = response
            return event

        else:
            raise ValueError(
                f"illegal model operation {op}, method={event.method}")

        response = self.postprocess(response)
        if self._model_logger:
            self._model_logger.push(start, request, response, op)
        event.body = response
        return event
Beispiel #8
0
    def do_event(self, event, *args, **kwargs):
        """Handles incoming requests.

        Parameters
        ----------
        event : nuclio.Event
            Incoming request as a nuclio.Event.

        Returns
        -------
        Response
            Event repsonse after running the requested logic
        """
        start = now_date()

        # Handle and verify the request
        event = self.preprocess(event)
        event = self._pre_handle_event(event)

        # Should we terminate the event?
        if hasattr(event, "terminated") and event.terminated:
            return event

        # Extract route information
        name, route, subpath = self._resolve_route(event.body, event.path)
        self.context.logger.debug(f"router run model {name}, op={subpath}")
        event.path = subpath

        # Return the correct response
        # If no model name was given and no operation
        if not name and route is None:
            # Return model list
            setattr(event, "terminated", True)
            event.body = {"models": list(self.routes.keys()) + [self.name]}
            return event
        else:
            # Verify we use the V2 protocol
            request = self.validate(event.body)

            # If this is a Router Operation
            if name == self.name:
                predictions = self._parallel_run(event)
                votes = self._apply_logic(predictions)
                # Format the prediction response like the regular
                # model's responses
                if self.format_response_with_col_name_flag:
                    votes = {self.prediction_col_name: votes}
                response = copy.copy(event)
                response_body = {
                    "id": event.id,
                    "model_name": votes,
                    "outputs": votes,
                }
                if self.version:
                    response_body["model_version"] = self.version
                response.body = response_body
            # A specific model event
            else:
                response = route.run(event)
                event.body = response.body if response else None

        response = self.postprocess(response)

        if self._model_logger and self.log_router:
            if "id" not in request:
                request["id"] = response.body["id"]
            self._model_logger.push(start, request, response.body)
        return response