def retrieve(self, request, *args, **kwargs): data = { "key": conf.get(ORGANIZATION_KEY) or get_dummy_key(), "version": conf.get(PLATFORM_VERSION), "dist": conf.get(PLATFORM_DIST), } return Response(data)
def runs_stop(run_id: int, run: Optional[BaseRun], update_status=False, message=None) -> bool: run = get_run(run_id=run_id, run=run) if not run: return True stopped = True should_stop = (LifeCycle.is_k8s_stoppable(run.status) or run.status == V1Statuses.STOPPING) if run.is_managed and should_stop: in_cluster = conf.get(K8S_IN_CLUSTER) if in_cluster and (run.is_service or run.is_job): stopped = manager.stop( run_uuid=run.uuid.hex, run_kind=run.kind, namespace=conf.get(K8S_NAMESPACE), in_cluster=in_cluster, ) if not stopped: return False if not update_status: return True new_run_stop_status(run=run, message=message) return True
def runs_artifacts_clean(run: hex): if run.is_managed: in_cluster = conf.get(K8S_IN_CLUSTER) if in_cluster and (run.is_service or run.is_job): op = operations.get_cleaner_operation( connection=settings.AGENT_CONFIG.artifacts_store, run_uuid=run.uuid.hex, run_kind=run.kind, ) try: in_cluster = conf.get(K8S_IN_CLUSTER) if in_cluster and (run.is_service or run.is_job): manager.make_and_create( content=op, owner_name=run.project.owner.name, project_name=run.project.name, run_name=run.name, run_uuid=run.uuid.hex, run_kind=run.kind, namespace=conf.get(K8S_NAMESPACE), in_cluster=in_cluster, ) return except Exception as e: _logger.warning( "Failed to run cleaning job for %s.%s.%s.\n %s", run.project.owner.name, run.project.name, run.uuid.hex, e, )
def runs_start(run_id: int, run: Optional[BaseRun]): run = get_run(run_id=run_id, run=run) if not run: return if not run.is_managed: return if not LifeCycle.is_compiled(run.status): _logger.info( "Run `%s` cannot transition from `%s` to `%s`.", run_id, run.status, V1Statuses.QUEUED, ) return condition = V1StatusCondition.get_condition( type=V1Statuses.QUEUED, status="True", reason="PolyaxonRunQueued", message="Run is queued", ) new_run_status(run=run, condition=condition) def _log_error(exc: Exception, message: str = None): message = message or "Could not start the operation.\n" message += "error: {}\n{}".format(repr(exc), traceback.format_exc()) cond = V1StatusCondition.get_condition( type=V1Statuses.FAILED, status="True", reason="PolyaxonRunFailed", message=message, ) new_run_status(run=run, condition=cond) try: in_cluster = conf.get(K8S_IN_CLUSTER) if in_cluster and (run.is_service or run.is_job): manager.start( content=run.content, owner_name=run.project.owner.name, project_name=run.project.name, run_name=run.name, run_uuid=run.uuid.hex, run_kind=run.kind, namespace=conf.get(K8S_NAMESPACE), in_cluster=in_cluster, default_auth=False, ) return except (PolyaxonK8SError, ApiException) as e: _log_error( exc=e, message="Kubernetes manager could not start the operation.\n") except PolypodException as e: _log_error(exc=e, message="Failed converting the run manifest.\n") except Exception as e: _log_error(exc=e, message="Failed with unknown exception.\n")
def runs_stop( run_id: int, run: Optional[BaseRun], update_status=False, message=None, clean=False, ) -> bool: run = get_run(run_id=run_id, run=run) if not run: return True stopped = True should_stop = (LifeCycle.is_k8s_stoppable(run.status) or run.status == V1Statuses.STOPPING) def _clean(): try: manager.clean( run_uuid=run.uuid.hex, run_kind=run.kind, namespace=conf.get(K8S_NAMESPACE), in_cluster=in_cluster, ) except (PolyaxonK8SError, ApiException) as e: _logger.warning( "Something went wrong, the run `%s` could not be stopped, error %s", run.uuid, e, ) return False if run.is_managed and should_stop: in_cluster = conf.get(K8S_IN_CLUSTER) if in_cluster and (run.is_service or run.is_job): if clean: _clean() try: stopped = manager.stop( run_uuid=run.uuid.hex, run_kind=run.kind, namespace=conf.get(K8S_NAMESPACE), in_cluster=in_cluster, ) except (PolyaxonK8SError, ApiException) as e: _logger.warning( "Something went wrong, the run `%s` could not be stopped, error %s", run.uuid, e, ) return False if not stopped: return False if not update_status: return True new_run_stop_status(run=run, message=message) return True
def handle_run_created(workers_backend, event: "Event") -> None: # noqa: F821 """Handles creation, resume, and restart""" eager = False if (event.instance and event.instance.status != V1Statuses.RESUMING and (event.instance.meta_info or {}).get(META_EAGER_MODE)): eager = True if not eager: eager = (not event.data["is_managed"] and event.instance and event.instance.content) # Run is not managed by Polyaxon if not event.data["is_managed"] and not eager: return # Run is managed by a pipeline if event.data.get("pipeline_id") is not None: return if conf.get(SCHEDULER_ENABLED) and not eager: workers_backend.send(CoreSchedulerCeleryTasks.RUNS_PREPARE, kwargs={"run_id": event.instance_id}) return # Eager mode manager.runs_prepare(run_id=event.instance_id, run=event.instance, eager=True)
def handle_run_deleted(workers_backend, event: "Event") -> None: # noqa: F821 if conf.get(SCHEDULER_ENABLED): workers_backend.send(CoreSchedulerCeleryTasks.RUNS_DELETE, kwargs={"run_id": event.instance_id}) return manager.runs_delete(run_id=event.instance_id, run=event.instance)
def test_serialize_one(self): obj1 = self.create_one_with_related() data = self.serializer_class(obj1).data assert set(data.keys()) == self.expected_keys assert data.pop("uuid") == obj1.uuid.hex assert data.pop("original") == { "uuid": obj1.original.uuid.hex, "name": obj1.original.name, "kind": obj1.cloning_kind, } assert data.pop("pipeline") == { "uuid": obj1.pipeline.uuid.hex, "name": obj1.pipeline.name, "kind": obj1.pipeline.kind, } assert data.pop("settings") == {"namespace": conf.get(K8S_NAMESPACE)} data.pop("created_at") data.pop("updated_at") data.pop("started_at", None) data.pop("finished_at", None) for k, v in data.items(): assert getattr(obj1, k) == v
def runs_start(run_id: int, run: Optional[BaseRun]): run = get_run(run_id=run_id, run=run) if not run: return if not run.is_managed: return if not LifeCycle.is_compiled(run.status): _logger.info( "Run `%s` cannot transition from `%s` to `%s`.", run_id, run.status, V1Statuses.QUEUED, ) return condition = V1StatusCondition.get_condition( type=V1Statuses.QUEUED, status="True", reason="PolyaxonRunQueued", message="Run is queued", ) new_run_status(run=run, condition=condition) try: in_cluster = conf.get(K8S_IN_CLUSTER) if in_cluster and (run.is_service or run.is_job): manager.start( content=run.content, owner_name=run.project.owner.name, project_name=run.project.name, run_name=run.name, run_uuid=run.uuid.hex, run_kind=run.kind, namespace=conf.get(K8S_NAMESPACE), in_cluster=in_cluster, default_auth=False, ) except PolyaxonK8SError as e: condition = V1StatusCondition.get_condition( type=V1Statuses.FAILED, status="True", reason="PolyaxonRunFailed", message="Could not start the job {}".format(e), ) new_run_status(run=run, condition=condition)
def get_urlpatterns(app_patterns: List, ui_urlpatterns: List): if conf.get(UI_ADMIN_ENABLED): app_patterns += [re_path(r"^{}/".format(ADMIN_V1), admin.site.urls)] urlpatterns = app_patterns + [ re_path(r"^healthz/?$", HealthView.as_view(), name="health_check"), ] urlpatterns += get_ui_urlpatterns(ui_urlpatterns) return urlpatterns
def handle_run_stopped_triggered(workers_backend, event: "Event") -> None: # noqa: F821 run = manager.get_run(run_id=event.instance_id, run=event.instance) if run.is_managed and conf.get(SCHEDULER_ENABLED): workers_backend.send(CoreSchedulerCeleryTasks.RUNS_STOP, kwargs={"run_id": event.instance_id}) return manager.runs_stop(run_id=event.instance_id, run=event.instance)
def get_urlpatterns(app_patterns: List, ui_urlpatterns: List): if conf.get(ADMIN_VIEW_ENABLED): app_patterns += [re_path(r"^_admin/", admin.site.urls)] urlpatterns = app_patterns + [ re_path(r"^healthz/?$", HealthView.as_view(), name="health_check"), re_path(r"^50x.html$", Handler50xView.as_view(), name="50x"), re_path( r"^permission.html$", Handler403View.as_view(), name="permission"), re_path(r"^404.html$", Handler404View.as_view(), name="404"), ] urlpatterns += get_ui_urlpatterns(ui_urlpatterns) return urlpatterns
def handle_run_deleted(workers_backend, event: "Event") -> None: # noqa: F821 run = manager.get_run(run_id=event.instance_id, run=event.instance) if not run: return if not run.is_managed: run.delete() return if conf.get(SCHEDULER_ENABLED): run.delete_in_progress() workers_backend.send(CoreSchedulerCeleryTasks.RUNS_DELETE, kwargs={"run_id": run.id}) else: manager.runs_delete(run_id=run.id, run=run)
def _clean(): try: manager.clean( run_uuid=run.uuid.hex, run_kind=run.kind, namespace=conf.get(K8S_NAMESPACE), in_cluster=in_cluster, ) except (PolyaxonK8SError, ApiException) as e: _logger.warning( "Something went wrong, the run `%s` could not be stopped, error %s", run.uuid, e, ) return False
def get(self, request, *args, **kwargs): self.init_config() config = self.get_config() if config and config.should_check(): config.version = pkg.VERSION key = conf.get(ORGANIZATION_KEY) or get_dummy_key() config.compatibility = get_compatibility( key=key, service=PolyaxonServices.PLATFORM, version=config.version, is_cli=False, ) config.last_check = now() self.write_config(config) return Response(status=status.HTTP_200_OK)
def handle_new_artifacts(workers_backend, event: "Event") -> None: # noqa: F821 artifacts = event.data.get("artifacts") if not artifacts: return if conf.get(SCHEDULER_ENABLED): workers_backend.send( CoreSchedulerCeleryTasks.RUNS_SET_ARTIFACTS, kwargs={"run_id": event.instance_id, "artifacts": artifacts}, ) return manager.runs_set_artifacts( run_id=event.instance_id, run=event.instance, artifacts=artifacts )
def handle_run_created(workers_backend, event: "Event") -> None: # noqa: F821 """Handles creation, resume, and restart""" # Run is not managed by Polyaxon if not event.data["is_managed"]: return # Run is managed by a pipeline if event.data.get("pipeline_id") is not None: return if conf.get(SCHEDULER_ENABLED): workers_backend.send(CoreSchedulerCeleryTasks.RUNS_PREPARE, kwargs={"run_id": event.instance_id}) return # Eager mode manager.runs_prepare(run_id=event.instance_id, run=event.instance, eager=True)
def get_settings(self, obj): return {"namespace": conf.get(K8S_NAMESPACE)}
def js_offline(request): return {"js_offline": conf.get(JS_OFFLINE)}
def ui_base_url(request): return {"ui_base_url": conf.get(UI_BASE_URL)}
def ui_offline(request): return {"ui_offline": conf.get(UI_OFFLINE)}
def send(task_name, kwargs=None, **options): options["ignore_result"] = options.get("ignore_result", True) if "countdown" not in options: options["countdown"] = conf.get(SCHEDULER_GLOBAL_COUNTDOWN) return app.send_task(task_name, kwargs=kwargs, **options)
def ui_enabled(request): return {"ui_enabled": conf.get(UI_ENABLED)}
def assets_version(request): return { "assets_version": "{}.{}".format(pkg.VERSION, conf.get(UI_ASSETS_VERSION)) }
def test_version(self): resp = self.client.get(self.installation_version) assert resp.status_code == status.HTTP_200_OK assert resp.data["version"] == conf.get(PLATFORM_VERSION) assert resp.data["dist"] == conf.get(PLATFORM_DIST) assert set(resp.data.keys()) == {'dist', 'key', 'version'}
def retrieve(self, request, *args, **kwargs): namespace = {"namespace": conf.get(K8S_NAMESPACE)} return Response(namespace)
def retrieve(self, request, *args, **kwargs): versions = { "platform_version": conf.get(PLATFORM_VERSION), "platform_dist": conf.get(PLATFORM_DIST), } return Response(versions)