예제 #1
0
 def retrieve(self, request, *args, **kwargs):
     data = {
         "key": conf.get(ORGANIZATION_KEY) or get_dummy_key(),
         "version": conf.get(PLATFORM_VERSION),
         "dist": conf.get(PLATFORM_DIST),
     }
     return Response(data)
예제 #2
0
def runs_stop(run_id: int,
              run: Optional[BaseRun],
              update_status=False,
              message=None) -> bool:
    run = get_run(run_id=run_id, run=run)
    if not run:
        return True

    stopped = True
    should_stop = (LifeCycle.is_k8s_stoppable(run.status)
                   or run.status == V1Statuses.STOPPING)
    if run.is_managed and should_stop:
        in_cluster = conf.get(K8S_IN_CLUSTER)
        if in_cluster and (run.is_service or run.is_job):
            stopped = manager.stop(
                run_uuid=run.uuid.hex,
                run_kind=run.kind,
                namespace=conf.get(K8S_NAMESPACE),
                in_cluster=in_cluster,
            )

    if not stopped:
        return False

    if not update_status:
        return True

    new_run_stop_status(run=run, message=message)
    return True
예제 #3
0
def runs_artifacts_clean(run: hex):
    if run.is_managed:
        in_cluster = conf.get(K8S_IN_CLUSTER)
        if in_cluster and (run.is_service or run.is_job):
            op = operations.get_cleaner_operation(
                connection=settings.AGENT_CONFIG.artifacts_store,
                run_uuid=run.uuid.hex,
                run_kind=run.kind,
            )
            try:
                in_cluster = conf.get(K8S_IN_CLUSTER)
                if in_cluster and (run.is_service or run.is_job):
                    manager.make_and_create(
                        content=op,
                        owner_name=run.project.owner.name,
                        project_name=run.project.name,
                        run_name=run.name,
                        run_uuid=run.uuid.hex,
                        run_kind=run.kind,
                        namespace=conf.get(K8S_NAMESPACE),
                        in_cluster=in_cluster,
                    )
                return
            except Exception as e:
                _logger.warning(
                    "Failed to run cleaning job for %s.%s.%s.\n %s",
                    run.project.owner.name,
                    run.project.name,
                    run.uuid.hex,
                    e,
                )
예제 #4
0
def runs_start(run_id: int, run: Optional[BaseRun]):
    run = get_run(run_id=run_id, run=run)
    if not run:
        return

    if not run.is_managed:
        return

    if not LifeCycle.is_compiled(run.status):
        _logger.info(
            "Run `%s` cannot transition from `%s` to `%s`.",
            run_id,
            run.status,
            V1Statuses.QUEUED,
        )
        return

    condition = V1StatusCondition.get_condition(
        type=V1Statuses.QUEUED,
        status="True",
        reason="PolyaxonRunQueued",
        message="Run is queued",
    )
    new_run_status(run=run, condition=condition)

    def _log_error(exc: Exception, message: str = None):
        message = message or "Could not start the operation.\n"
        message += "error: {}\n{}".format(repr(exc), traceback.format_exc())
        cond = V1StatusCondition.get_condition(
            type=V1Statuses.FAILED,
            status="True",
            reason="PolyaxonRunFailed",
            message=message,
        )
        new_run_status(run=run, condition=cond)

    try:
        in_cluster = conf.get(K8S_IN_CLUSTER)
        if in_cluster and (run.is_service or run.is_job):
            manager.start(
                content=run.content,
                owner_name=run.project.owner.name,
                project_name=run.project.name,
                run_name=run.name,
                run_uuid=run.uuid.hex,
                run_kind=run.kind,
                namespace=conf.get(K8S_NAMESPACE),
                in_cluster=in_cluster,
                default_auth=False,
            )
        return
    except (PolyaxonK8SError, ApiException) as e:
        _log_error(
            exc=e,
            message="Kubernetes manager could not start the operation.\n")
    except PolypodException as e:
        _log_error(exc=e, message="Failed converting the run manifest.\n")
    except Exception as e:
        _log_error(exc=e, message="Failed with unknown exception.\n")
예제 #5
0
def runs_stop(
    run_id: int,
    run: Optional[BaseRun],
    update_status=False,
    message=None,
    clean=False,
) -> bool:
    run = get_run(run_id=run_id, run=run)
    if not run:
        return True

    stopped = True
    should_stop = (LifeCycle.is_k8s_stoppable(run.status)
                   or run.status == V1Statuses.STOPPING)

    def _clean():
        try:
            manager.clean(
                run_uuid=run.uuid.hex,
                run_kind=run.kind,
                namespace=conf.get(K8S_NAMESPACE),
                in_cluster=in_cluster,
            )
        except (PolyaxonK8SError, ApiException) as e:
            _logger.warning(
                "Something went wrong, the run `%s` could not be stopped, error %s",
                run.uuid,
                e,
            )
            return False

    if run.is_managed and should_stop:
        in_cluster = conf.get(K8S_IN_CLUSTER)
        if in_cluster and (run.is_service or run.is_job):
            if clean:
                _clean()
            try:
                stopped = manager.stop(
                    run_uuid=run.uuid.hex,
                    run_kind=run.kind,
                    namespace=conf.get(K8S_NAMESPACE),
                    in_cluster=in_cluster,
                )
            except (PolyaxonK8SError, ApiException) as e:
                _logger.warning(
                    "Something went wrong, the run `%s` could not be stopped, error %s",
                    run.uuid,
                    e,
                )
                return False

    if not stopped:
        return False

    if not update_status:
        return True

    new_run_stop_status(run=run, message=message)
    return True
예제 #6
0
def handle_run_created(workers_backend, event: "Event") -> None:  # noqa: F821
    """Handles creation, resume, and restart"""
    eager = False
    if (event.instance and event.instance.status != V1Statuses.RESUMING
            and (event.instance.meta_info or {}).get(META_EAGER_MODE)):
        eager = True
    if not eager:
        eager = (not event.data["is_managed"] and event.instance
                 and event.instance.content)
    # Run is not managed by Polyaxon
    if not event.data["is_managed"] and not eager:
        return
    # Run is managed by a pipeline
    if event.data.get("pipeline_id") is not None:
        return

    if conf.get(SCHEDULER_ENABLED) and not eager:
        workers_backend.send(CoreSchedulerCeleryTasks.RUNS_PREPARE,
                             kwargs={"run_id": event.instance_id})
        return

    # Eager mode
    manager.runs_prepare(run_id=event.instance_id,
                         run=event.instance,
                         eager=True)
예제 #7
0
def handle_run_deleted(workers_backend, event: "Event") -> None:  # noqa: F821
    if conf.get(SCHEDULER_ENABLED):
        workers_backend.send(CoreSchedulerCeleryTasks.RUNS_DELETE,
                             kwargs={"run_id": event.instance_id})
        return

    manager.runs_delete(run_id=event.instance_id, run=event.instance)
    def test_serialize_one(self):
        obj1 = self.create_one_with_related()

        data = self.serializer_class(obj1).data

        assert set(data.keys()) == self.expected_keys
        assert data.pop("uuid") == obj1.uuid.hex
        assert data.pop("original") == {
            "uuid": obj1.original.uuid.hex,
            "name": obj1.original.name,
            "kind": obj1.cloning_kind,
        }
        assert data.pop("pipeline") == {
            "uuid": obj1.pipeline.uuid.hex,
            "name": obj1.pipeline.name,
            "kind": obj1.pipeline.kind,
        }
        assert data.pop("settings") == {"namespace": conf.get(K8S_NAMESPACE)}
        data.pop("created_at")
        data.pop("updated_at")
        data.pop("started_at", None)
        data.pop("finished_at", None)

        for k, v in data.items():
            assert getattr(obj1, k) == v
예제 #9
0
def runs_start(run_id: int, run: Optional[BaseRun]):
    run = get_run(run_id=run_id, run=run)
    if not run:
        return

    if not run.is_managed:
        return

    if not LifeCycle.is_compiled(run.status):
        _logger.info(
            "Run `%s` cannot transition from `%s` to `%s`.",
            run_id,
            run.status,
            V1Statuses.QUEUED,
        )
        return

    condition = V1StatusCondition.get_condition(
        type=V1Statuses.QUEUED,
        status="True",
        reason="PolyaxonRunQueued",
        message="Run is queued",
    )
    new_run_status(run=run, condition=condition)

    try:
        in_cluster = conf.get(K8S_IN_CLUSTER)
        if in_cluster and (run.is_service or run.is_job):
            manager.start(
                content=run.content,
                owner_name=run.project.owner.name,
                project_name=run.project.name,
                run_name=run.name,
                run_uuid=run.uuid.hex,
                run_kind=run.kind,
                namespace=conf.get(K8S_NAMESPACE),
                in_cluster=in_cluster,
                default_auth=False,
            )
    except PolyaxonK8SError as e:
        condition = V1StatusCondition.get_condition(
            type=V1Statuses.FAILED,
            status="True",
            reason="PolyaxonRunFailed",
            message="Could not start the job {}".format(e),
        )
        new_run_status(run=run, condition=condition)
예제 #10
0
def get_urlpatterns(app_patterns: List, ui_urlpatterns: List):
    if conf.get(UI_ADMIN_ENABLED):
        app_patterns += [re_path(r"^{}/".format(ADMIN_V1), admin.site.urls)]

    urlpatterns = app_patterns + [
        re_path(r"^healthz/?$", HealthView.as_view(), name="health_check"),
    ]
    urlpatterns += get_ui_urlpatterns(ui_urlpatterns)

    return urlpatterns
예제 #11
0
def handle_run_stopped_triggered(workers_backend,
                                 event: "Event") -> None:  # noqa: F821
    run = manager.get_run(run_id=event.instance_id, run=event.instance)

    if run.is_managed and conf.get(SCHEDULER_ENABLED):
        workers_backend.send(CoreSchedulerCeleryTasks.RUNS_STOP,
                             kwargs={"run_id": event.instance_id})
        return

    manager.runs_stop(run_id=event.instance_id, run=event.instance)
예제 #12
0
def get_urlpatterns(app_patterns: List, ui_urlpatterns: List):
    if conf.get(ADMIN_VIEW_ENABLED):
        app_patterns += [re_path(r"^_admin/", admin.site.urls)]

    urlpatterns = app_patterns + [
        re_path(r"^healthz/?$", HealthView.as_view(), name="health_check"),
        re_path(r"^50x.html$", Handler50xView.as_view(), name="50x"),
        re_path(
            r"^permission.html$", Handler403View.as_view(), name="permission"),
        re_path(r"^404.html$", Handler404View.as_view(), name="404"),
    ]
    urlpatterns += get_ui_urlpatterns(ui_urlpatterns)

    return urlpatterns
예제 #13
0
def handle_run_deleted(workers_backend, event: "Event") -> None:  # noqa: F821
    run = manager.get_run(run_id=event.instance_id, run=event.instance)
    if not run:
        return

    if not run.is_managed:
        run.delete()
        return

    if conf.get(SCHEDULER_ENABLED):
        run.delete_in_progress()
        workers_backend.send(CoreSchedulerCeleryTasks.RUNS_DELETE,
                             kwargs={"run_id": run.id})
    else:
        manager.runs_delete(run_id=run.id, run=run)
예제 #14
0
 def _clean():
     try:
         manager.clean(
             run_uuid=run.uuid.hex,
             run_kind=run.kind,
             namespace=conf.get(K8S_NAMESPACE),
             in_cluster=in_cluster,
         )
     except (PolyaxonK8SError, ApiException) as e:
         _logger.warning(
             "Something went wrong, the run `%s` could not be stopped, error %s",
             run.uuid,
             e,
         )
         return False
예제 #15
0
파일: health.py 프로젝트: zhaohb/polyaxon
 def get(self, request, *args, **kwargs):
     self.init_config()
     config = self.get_config()
     if config and config.should_check():
         config.version = pkg.VERSION
         key = conf.get(ORGANIZATION_KEY) or get_dummy_key()
         config.compatibility = get_compatibility(
             key=key,
             service=PolyaxonServices.PLATFORM,
             version=config.version,
             is_cli=False,
         )
         config.last_check = now()
         self.write_config(config)
     return Response(status=status.HTTP_200_OK)
예제 #16
0
파일: run.py 프로젝트: opentechfn/polyaxon
def handle_new_artifacts(workers_backend, event: "Event") -> None:  # noqa: F821
    artifacts = event.data.get("artifacts")
    if not artifacts:
        return

    if conf.get(SCHEDULER_ENABLED):
        workers_backend.send(
            CoreSchedulerCeleryTasks.RUNS_SET_ARTIFACTS,
            kwargs={"run_id": event.instance_id, "artifacts": artifacts},
        )
        return

    manager.runs_set_artifacts(
        run_id=event.instance_id, run=event.instance, artifacts=artifacts
    )
예제 #17
0
파일: run.py 프로젝트: zhaohb/polyaxon
def handle_run_created(workers_backend, event: "Event") -> None:  # noqa: F821
    """Handles creation, resume, and restart"""
    # Run is not managed by Polyaxon
    if not event.data["is_managed"]:
        return
    # Run is managed by a pipeline
    if event.data.get("pipeline_id") is not None:
        return

    if conf.get(SCHEDULER_ENABLED):
        workers_backend.send(CoreSchedulerCeleryTasks.RUNS_PREPARE,
                             kwargs={"run_id": event.instance_id})
        return

    # Eager mode
    manager.runs_prepare(run_id=event.instance_id,
                         run=event.instance,
                         eager=True)
예제 #18
0
 def get_settings(self, obj):
     return {"namespace": conf.get(K8S_NAMESPACE)}
예제 #19
0
def js_offline(request):
    return {"js_offline": conf.get(JS_OFFLINE)}
예제 #20
0
def ui_base_url(request):
    return {"ui_base_url": conf.get(UI_BASE_URL)}
예제 #21
0
def ui_offline(request):
    return {"ui_offline": conf.get(UI_OFFLINE)}
예제 #22
0
def send(task_name, kwargs=None, **options):
    options["ignore_result"] = options.get("ignore_result", True)
    if "countdown" not in options:
        options["countdown"] = conf.get(SCHEDULER_GLOBAL_COUNTDOWN)
    return app.send_task(task_name, kwargs=kwargs, **options)
예제 #23
0
def ui_enabled(request):
    return {"ui_enabled": conf.get(UI_ENABLED)}
예제 #24
0
def assets_version(request):
    return {
        "assets_version": "{}.{}".format(pkg.VERSION,
                                         conf.get(UI_ASSETS_VERSION))
    }
예제 #25
0
 def test_version(self):
     resp = self.client.get(self.installation_version)
     assert resp.status_code == status.HTTP_200_OK
     assert resp.data["version"] == conf.get(PLATFORM_VERSION)
     assert resp.data["dist"] == conf.get(PLATFORM_DIST)
     assert set(resp.data.keys()) == {'dist', 'key', 'version'}
예제 #26
0
 def retrieve(self, request, *args, **kwargs):
     namespace = {"namespace": conf.get(K8S_NAMESPACE)}
     return Response(namespace)
예제 #27
0
 def retrieve(self, request, *args, **kwargs):
     versions = {
         "platform_version": conf.get(PLATFORM_VERSION),
         "platform_dist": conf.get(PLATFORM_DIST),
     }
     return Response(versions)