Exemple #1
0
 def test_is_pending(self):
     assert LifeCycle.is_pending(None) is False
     for status in LifeCycle.VALUES:
         if status in LifeCycle.PENDING_VALUES:
             assert LifeCycle.is_pending(status) is True
         else:
             assert LifeCycle.is_pending(status) is False
Exemple #2
0
 def test_is_starting(self):
     assert LifeCycle.is_starting(None) is False
     for status in LifeCycle.VALUES:
         if status == V1Statuses.STARTING:
             assert LifeCycle.is_starting(status) is True
         else:
             assert LifeCycle.is_starting(status) is False
Exemple #3
0
 def test_can_check_heartbeat(self):
     assert LifeCycle.can_check_heartbeat(None) is False
     for status in LifeCycle.VALUES:
         if LifeCycle.is_running(status):
             assert LifeCycle.can_check_heartbeat(status) is True
         else:
             assert LifeCycle.can_check_heartbeat(status) is False
Exemple #4
0
 def test_is_unschedulable(self):
     assert LifeCycle.is_unschedulable(None) is False
     for status in LifeCycle.VALUES:
         if status == V1Statuses.UNSCHEDULABLE:
             assert LifeCycle.is_unschedulable(status) is True
         else:
             assert LifeCycle.is_unschedulable(status) is False
Exemple #5
0
 def test_failed(self):
     assert LifeCycle.failed(None) is False
     for status in LifeCycle.VALUES:
         if status in {V1Statuses.FAILED, V1Statuses.UPSTREAM_FAILED}:
             assert LifeCycle.failed(status) is True
         else:
             assert LifeCycle.failed(status) is False
Exemple #6
0
 def test_skipped(self):
     assert LifeCycle.skipped(None) is False
     for status in LifeCycle.VALUES:
         if status == V1Statuses.SKIPPED:
             assert LifeCycle.skipped(status) is True
         else:
             assert LifeCycle.skipped(status) is False
Exemple #7
0
 def test_is_done(self):
     assert LifeCycle.is_done(None) is False
     for status in LifeCycle.VALUES:
         if status in LifeCycle.DONE_VALUES:
             assert LifeCycle.is_done(status) is True
         else:
             assert LifeCycle.is_done(status) is False
Exemple #8
0
 def test_succeeded(self):
     assert LifeCycle.succeeded(None) is False
     for status in LifeCycle.VALUES:
         if status == V1Statuses.SUCCEEDED:
             assert LifeCycle.succeeded(status) is True
         else:
             assert LifeCycle.succeeded(status) is False
Exemple #9
0
 def test_is_stoppable(self):
     assert LifeCycle.is_stoppable(None) is True
     for status in LifeCycle.VALUES:
         if not LifeCycle.is_done(status):
             assert LifeCycle.is_stoppable(status) is True
         else:
             assert LifeCycle.is_stoppable(status) is False
Exemple #10
0
 def is_stopping(self):
     assert LifeCycle.is_stopping(None) is False
     for status in LifeCycle.VALUES:
         if status == V1Statuses.STOPPING:
             assert LifeCycle.is_stopping(status) is True
         else:
             assert LifeCycle.is_stopping(status) is False
Exemple #11
0
 def test_is_unknown(self):
     assert LifeCycle.is_unknown(None) is False
     for status in LifeCycle.VALUES:
         if status == V1Statuses.UNKNOWN:
             assert LifeCycle.is_unknown(status) is True
         else:
             assert LifeCycle.is_unknown(status) is False
 def test_is_compilable(self):
     assert LifeCycle.is_unschedulable(None) is False
     for status in V1Statuses.allowable_values:
         if status in LifeCycle.COMPILABLE_VALUES:
             assert LifeCycle.is_compilable(status) is True
         else:
             assert LifeCycle.is_compilable(status) is False
Exemple #13
0
 def test_is_running(self):
     assert LifeCycle.is_running(None) is False
     for status in LifeCycle.VALUES:
         if status in LifeCycle.RUNNING_VALUES:
             assert LifeCycle.is_running(status) is True
         else:
             assert LifeCycle.is_running(status) is False
 def test_is_unschedulable(self):
     assert LifeCycle.is_unschedulable(None) is False
     for status in V1Statuses.allowable_values:
         if status == V1Statuses.UNSCHEDULABLE:
             assert LifeCycle.is_unschedulable(status) is True
         else:
             assert LifeCycle.is_unschedulable(status) is False
 def test_can_check_heartbeat(self):
     assert LifeCycle.can_check_heartbeat(None) is False
     for status in V1Statuses.allowable_values:
         if LifeCycle.is_running(status):
             assert LifeCycle.can_check_heartbeat(status) is True
         else:
             assert LifeCycle.can_check_heartbeat(status) is False
 def test_is_starting(self):
     assert LifeCycle.is_starting(None) is False
     for status in V1Statuses.allowable_values:
         if status == V1Statuses.STARTING:
             assert LifeCycle.is_starting(status) is True
         else:
             assert LifeCycle.is_starting(status) is False
 def test_failed(self):
     assert LifeCycle.failed(None) is False
     for status in V1Statuses.allowable_values:
         if status in {V1Statuses.FAILED, V1Statuses.UPSTREAM_FAILED}:
             assert LifeCycle.failed(status) is True
         else:
             assert LifeCycle.failed(status) is False
 def test_skipped(self):
     assert LifeCycle.skipped(None) is False
     for status in V1Statuses.allowable_values:
         if status == V1Statuses.SKIPPED:
             assert LifeCycle.skipped(status) is True
         else:
             assert LifeCycle.skipped(status) is False
 def test_is_done(self):
     assert LifeCycle.is_done(None) is False
     for status in V1Statuses.allowable_values:
         if status in LifeCycle.DONE_VALUES:
             assert LifeCycle.is_done(status) is True
         else:
             assert LifeCycle.is_done(status) is False
 def test_succeeded(self):
     assert LifeCycle.succeeded(None) is False
     for status in V1Statuses.allowable_values:
         if status == V1Statuses.SUCCEEDED:
             assert LifeCycle.succeeded(status) is True
         else:
             assert LifeCycle.succeeded(status) is False
 def is_stopping(self):
     assert LifeCycle.is_stopping(None) is False
     for status in V1Statuses.allowable_values:
         if status == V1Statuses.STOPPING:
             assert LifeCycle.is_stopping(status) is True
         else:
             assert LifeCycle.is_stopping(status) is False
 def test_is_stoppable(self):
     assert LifeCycle.is_stoppable(None) is True
     for status in V1Statuses.allowable_values:
         if not LifeCycle.is_done(status):
             assert LifeCycle.is_stoppable(status) is True
         else:
             assert LifeCycle.is_stoppable(status) is False
 def test_is_unknown(self):
     assert LifeCycle.is_unknown(None) is False
     for status in V1Statuses.allowable_values:
         if status == V1Statuses.UNKNOWN:
             assert LifeCycle.is_unknown(status) is True
         else:
             assert LifeCycle.is_unknown(status) is False
 def test_is_pending(self):
     assert LifeCycle.is_pending(None) is False
     for status in V1Statuses.allowable_values:
         if status in LifeCycle.PENDING_VALUES:
             assert LifeCycle.is_pending(status) is True
         else:
             assert LifeCycle.is_pending(status) is False
Exemple #25
0
def new_status(
    entity, condition: V1StatusCondition, additional_fields: List[str] = None
):
    previous_status = entity.status
    if condition.type == V1Statuses.CREATED:
        return previous_status
    if previous_status == V1Statuses.STOPPING and not LifeCycle.is_done(condition.type):
        return previous_status

    entity = set_entity_status(entity=entity, condition=condition)

    LifeCycle.set_started_at(entity=entity)
    LifeCycle.set_finished_at(entity=entity)
    additional_fields = additional_fields or []
    entity.save(
        update_fields=additional_fields
        + [
            "status_conditions",
            "status",
            "started_at",
            "updated_at",
            "finished_at",
            "wait_time",
            "duration",
        ]
    )

    return previous_status
Exemple #26
0
def runs_stop(run_id: int,
              run: Optional[BaseRun],
              update_status=False,
              message=None) -> bool:
    run = get_run(run_id=run_id, run=run)
    if not run:
        return True

    stopped = True
    should_stop = (LifeCycle.is_k8s_stoppable(run.status)
                   or run.status == V1Statuses.STOPPING)
    if run.is_managed and should_stop:
        in_cluster = conf.get(K8S_IN_CLUSTER)
        if in_cluster and (run.is_service or run.is_job):
            stopped = manager.stop(
                run_uuid=run.uuid.hex,
                run_kind=run.kind,
                namespace=conf.get(K8S_NAMESPACE),
                in_cluster=in_cluster,
            )

    if not stopped:
        return False

    if not update_status:
        return True

    new_run_stop_status(run=run, message=message)
    return True
Exemple #27
0
def new_run_status(run: BaseRun, condition: V1StatusCondition):
    previous_status = new_status(entity=run, condition=condition)
    # Do not audit the new status since it's the same as the previous one
    if (condition.type in {V1Statuses.CREATED, V1Statuses.STOPPING}
            or previous_status == run.status):
        return

    auditor.record(event_type=RUN_NEW_STATUS,
                   instance=run,
                   previous_status=previous_status)
    if run.status == V1Statuses.STOPPED:
        auditor.record(event_type=RUN_STOPPED,
                       instance=run,
                       previous_status=previous_status)
    elif run.status == V1Statuses.FAILED:
        auditor.record(event_type=RUN_FAILED,
                       instance=run,
                       previous_status=previous_status)
    elif run.status == V1Statuses.SUCCEEDED:
        auditor.record(event_type=RUN_SUCCEEDED,
                       instance=run,
                       previous_status=previous_status)
    elif run.status == V1Statuses.SKIPPED:
        auditor.record(event_type=RUN_SKIPPED,
                       instance=run,
                       previous_status=previous_status)
    elif run.status == V1Statuses.RESUMING:
        auditor.record(event_type=RUN_RESUMED, instance=run)

    # handle done status
    if LifeCycle.is_done(run.status):
        auditor.record(event_type=RUN_DONE,
                       instance=run,
                       previous_status=previous_status)
Exemple #28
0
def new_run_stopping_status(run, message) -> bool:
    if LifeCycle.is_done(run.status, progressing=True):
        return False

    if LifeCycle.is_safe_stoppable(run.status):
        new_run_stop_status(run, message)
        return True
    message = f"Run is stopping; {message}" if message else "Run is stopping"
    condition = V1StatusCondition.get_condition(
        type=V1Statuses.STOPPING,
        status="True",
        reason="StateManager",
        message=message,
    )
    new_run_status(run=run, condition=condition)
    return True
Exemple #29
0
def runs_start(run_id: int, run: Optional[BaseRun]):
    run = get_run(run_id=run_id, run=run)
    if not run:
        return

    if not run.is_managed:
        return

    if not LifeCycle.is_compiled(run.status):
        _logger.info(
            "Run `%s` cannot transition from `%s` to `%s`.",
            run_id,
            run.status,
            V1Statuses.QUEUED,
        )
        return

    condition = V1StatusCondition.get_condition(
        type=V1Statuses.QUEUED,
        status="True",
        reason="PolyaxonRunQueued",
        message="Run is queued",
    )
    new_run_status(run=run, condition=condition)

    def _log_error(exc: Exception, message: str = None):
        message = message or "Could not start the operation.\n"
        message += "error: {}\n{}".format(repr(exc), traceback.format_exc())
        cond = V1StatusCondition.get_condition(
            type=V1Statuses.FAILED,
            status="True",
            reason="PolyaxonRunFailed",
            message=message,
        )
        new_run_status(run=run, condition=cond)

    try:
        in_cluster = conf.get(K8S_IN_CLUSTER)
        if in_cluster and (run.is_service or run.is_job):
            manager.start(
                content=run.content,
                owner_name=run.project.owner.name,
                project_name=run.project.name,
                run_name=run.name,
                run_uuid=run.uuid.hex,
                run_kind=run.kind,
                namespace=conf.get(K8S_NAMESPACE),
                in_cluster=in_cluster,
                default_auth=False,
            )
        return
    except (PolyaxonK8SError, ApiException) as e:
        _log_error(
            exc=e,
            message="Kubernetes manager could not start the operation.\n")
    except PolypodException as e:
        _log_error(exc=e, message="Failed converting the run manifest.\n")
    except Exception as e:
        _log_error(exc=e, message="Failed with unknown exception.\n")
Exemple #30
0
def runs_stop(
    run_id: int,
    run: Optional[BaseRun],
    update_status=False,
    message=None,
    clean=False,
) -> bool:
    run = get_run(run_id=run_id, run=run)
    if not run:
        return True

    stopped = True
    should_stop = (LifeCycle.is_k8s_stoppable(run.status)
                   or run.status == V1Statuses.STOPPING)

    def _clean():
        try:
            manager.clean(
                run_uuid=run.uuid.hex,
                run_kind=run.kind,
                namespace=conf.get(K8S_NAMESPACE),
                in_cluster=in_cluster,
            )
        except (PolyaxonK8SError, ApiException) as e:
            _logger.warning(
                "Something went wrong, the run `%s` could not be stopped, error %s",
                run.uuid,
                e,
            )
            return False

    if run.is_managed and should_stop:
        in_cluster = conf.get(K8S_IN_CLUSTER)
        if in_cluster and (run.is_service or run.is_job):
            if clean:
                _clean()
            try:
                stopped = manager.stop(
                    run_uuid=run.uuid.hex,
                    run_kind=run.kind,
                    namespace=conf.get(K8S_NAMESPACE),
                    in_cluster=in_cluster,
                )
            except (PolyaxonK8SError, ApiException) as e:
                _logger.warning(
                    "Something went wrong, the run `%s` could not be stopped, error %s",
                    run.uuid,
                    e,
                )
                return False

    if not stopped:
        return False

    if not update_status:
        return True

    new_run_stop_status(run=run, message=message)
    return True