Example #1
0
def status(ctx: click.Context, username: str):
    """
    Returns status of a model

    :param username; if checked - searches for model for a certain user
    """
    try:
        workflows: List[ArgoWorkflow.ArgoWorkflowCliModel] = []
        if not username:
            namespace = get_kubectl_current_context_namespace()
        else:
            namespace = username
        with spinner(text=Texts.LOAD_DATA_MSG):
            # filtering out workflows used to build images with training jobs
            workflows = [
                workflow.cli_representation for workflow in ArgoWorkflow.list(
                    namespace=namespace, label_selector="type!=build-workflow")
            ]

        click.echo(
            tabulate(workflows,
                     headers=MODEL_HEADERS,
                     tablefmt=TBLT_TABLE_FORMAT))
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_MSG,
                     Texts.OTHER_ERROR_MSG,
                     add_verbosity_msg=True)
        exit(1)
Example #2
0
def workflow_list(ctx: click.Context):
    try:
        namespace = get_kubectl_current_context_namespace()
        workflows: List[ArgoWorkflow] = ArgoWorkflow.list(namespace=namespace)
        click.echo(
            tabulate([workflow.cli_representation for workflow in workflows],
                     headers=HEADERS,
                     tablefmt=TBLT_TABLE_FORMAT))
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_MSG,
                     Texts.OTHER_ERROR_MSG,
                     add_verbosity_msg=True)
        exit(1)
Example #3
0
def workflow_list(state: State):
    try:
        namespace = get_kubectl_current_context_namespace()
        workflows = ArgoWorkflow.list(namespace=namespace)
        click.echo(
            tabulate([workflow.cli_representation for workflow in workflows],
                     headers=HEADERS,
                     tablefmt="orgtbl"))
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_MSG,
                     Texts.OTHER_ERROR_MSG,
                     add_verbosity_msg=True)
        exit(1)
Example #4
0
def purge_experiment(exp_name: str, runs_to_purge: List[Run],
                     k8s_es_client: K8sElasticSearchClient,
                     namespace: str) -> Tuple[List[Run], List[Run]]:
    """
       Purge experiment with a given name by cancelling runs given as a parameter. If given experiment
       contains more runs than is in the list of runs - experiment's state remains intact.

       :param exp_name: name of an experiment to which belong runs passed in run_list parameter
       :param runs_to_purge: list of runs that should be purged, they have to belong to exp_name experiment
       :param k8s_es_client: Kubernetes ElasticSearch client
       :param namespace: namespace where experiment is located
       :return: two list - first contains runs that were cancelled successfully, second - those which weren't
       """
    logger.debug(f"Purging {exp_name} experiment ...")

    purged_runs: List[Run] = []
    not_purged_runs: List[Run] = []

    experiment = Experiment.get(name=exp_name, namespace=namespace)
    if not experiment:
        raise RuntimeError(Texts.GET_EXPERIMENT_ERROR_MSG)

    experiment_runs = Run.list(namespace=namespace, exp_name_filter=[exp_name])
    # check whether experiment has more runs that should be cancelled
    cancel_whole_experiment = (len(experiment_runs) == len(runs_to_purge))
    if cancel_whole_experiment:
        experiment.state = ExperimentStatus.CANCELLING
        experiment.update()

    try:
        cancelled_runs, not_cancelled_runs = cancel_experiment_runs(
            runs_to_cancel=runs_to_purge, namespace=namespace)
        not_purged_runs = not_cancelled_runs

        if cancel_whole_experiment:
            # Delete associated workflows
            experiment_associated_workflows = [
                wf for wf in ArgoWorkflow.list(namespace=namespace)
                if wf.labels.get('experimentName') == experiment.name
            ]
            for wf in experiment_associated_workflows:
                wf.delete()

            # Remove tags from git repo manager
            try:
                delete_exp_tag_from_git_repo_manager(
                    experiment_name=experiment.name,
                    username=namespace,
                    experiments_workdir=get_run_environment_path(''))
            except Exception:
                handle_error(logger, Texts.GIT_REPO_MANAGER_ERROR_MSG,
                             Texts.GIT_REPO_MANAGER_ERROR_MSG)
                raise

        for run in cancelled_runs:
            logger.debug(f"Purging {run.name} run ...")
            click.echo(Texts.PURGING_START_MSG.format(run_name=run.name))
            try:
                with spinner(text=Texts.PURGING_PROGRESS_MSG.format(
                        run_name=run.name)):
                    # purge helm release
                    delete_helm_release(run.name,
                                        namespace=namespace,
                                        purge=True)
                    # delete run
                    kubectl.delete_k8s_object("run", run.name)
                    purged_runs.append(run)
            except Exception as exe:
                not_purged_runs.append(run)
                logger.exception("Error during purging runs.")
                # occurence of NotFound error may mean, that run has been removed earlier
                if "NotFound" not in str(exe):
                    click.echo(
                        Texts.INCOMPLETE_PURGE_ERROR_MSG.format(
                            experiment_name=experiment_name))
                    raise exe
            try:
                # clear run logs
                if is_current_user_administrator():
                    logger.debug(f"Clearing logs for {run.name} run.")
                    with spinner(text=Texts.PURGING_LOGS_PROGRESS_MSG.format(
                            run_name=run.name)):
                        k8s_es_client.delete_logs_for_run(run=run.name,
                                                          namespace=namespace)
            except Exception:
                logger.exception("Error during clearing run logs.")

            # CAN-1099 - docker garbage collector has errors that prevent from correct removal of images
            # try:
            # try to remove images from docker registry
            #    delete_images_for_experiment(exp_name=run.name)
            # except Exception:
            #    logger.exception("Error during removing images.")

        if cancel_whole_experiment and not not_purged_runs:
            try:
                kubectl.delete_k8s_object("experiment", exp_name)
            except Exception:
                # problems during deleting experiments are hidden as if runs were
                # cancelled user doesn't have a possibility to remove them
                logger.exception("Error during purging experiment.")

    except Exception:
        logger.exception("Error during purging experiment.")
        return purged_runs, not_purged_runs

    return purged_runs, not_purged_runs
Example #5
0
def get_logs(operation_name: str, start_date: str, end_date: str, match: str,
             output: bool, pager: bool, follow: bool):
    """
    Show logs for a given model export operation.
    """
    # check whether we have operations with a given name
    if operation_name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG)
        exit(1)
    elif not operation_name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG)
        exit(1)

    try:
        with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
            es_client = K8sElasticSearchClient(host="127.0.0.1",
                                               port=proxy.tunnel_port,
                                               verify_certs=False,
                                               use_ssl=False)
            namespace = get_kubectl_current_context_namespace()
            if match:
                operation_name = match
                name_filter = match
            else:
                name_filter = f'^{operation_name}$'
            workflows = ArgoWorkflow.list(namespace=namespace,
                                          name_filter=name_filter)
            if not workflows:
                raise ValueError(
                    f'Operation with given name: {operation_name} does not '
                    f'exists in namespace {namespace}.')

            follow_logs = True if follow and not output else False

            if output and len(workflows) > 1:
                click.echo(Texts.MORE_EXP_LOGS_MESSAGE)

            for workflow in workflows:
                start_date = start_date if start_date else workflow.started_at

                ops_logs_generator = es_client.get_argo_workflow_logs_generator(
                    workflow=workflow,
                    namespace=namespace,
                    start_date=start_date,
                    end_date=end_date,
                    follow=follow_logs)

                if output:
                    save_logs_to_file(logs_generator=ops_logs_generator,
                                      instance_name=workflow.name,
                                      instance_type="operation")
                else:
                    if len(workflows) > 1:
                        click.echo(f'Operation : {workflow.name}')
                    print_logs(run_logs_generator=ops_logs_generator,
                               pager=pager)

    except K8sProxyCloseError:
        handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG,
                     Texts.PROXY_CLOSE_LOG_ERROR_MSG)
        exit(1)
    except LocalPortOccupiedError as exe:
        handle_error(
            logger,
            Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(
                exception_message=exe.message),
            Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(
                exception_message=exe.message))
        exit(1)
    except K8sProxyOpenError:
        handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG,
                     Texts.PROXY_CREATION_ERROR_MSG)
        exit(1)
    except ValueError:
        handle_error(
            logger,
            Texts.OPERATION_NOT_EXISTS_ERROR_MSG.format(
                operation_name=operation_name),
            Texts.OPERATION_NOT_EXISTS_ERROR_MSG.format(
                experiment_name=operation_name))
        exit(1)
    except Exception:
        handle_error(logger, Texts.LOGS_GET_OTHER_ERROR_MSG,
                     Texts.LOGS_GET_OTHER_ERROR_MSG)
        exit(1)