Ejemplo n.º 1
0
async def experiment_job_logs_v2(request, ws, username, project_name,
                                 experiment_id, job_id):
    job, experiment, message = validate_experiment_job(
        request=request,
        username=username,
        project_name=project_name,
        experiment_id=experiment_id,
        job_id=job_id)
    if job is None:
        await ws.send(get_error_message(message))
        return

    pod_id = job.pod_id

    container_job_name = get_experiment_job_container_name(
        backend=experiment.backend, framework=experiment.framework)

    auditor.record(event_type=EXPERIMENT_JOB_LOGS_VIEWED,
                   instance=job,
                   actor_id=request.app.user.id,
                   actor_name=request.app.user.username)

    # Stream logs
    await log_job(request=request,
                  ws=ws,
                  job=job,
                  pod_id=pod_id,
                  container=container_job_name,
                  namespace=conf.get('K8S_NAMESPACE'))
Ejemplo n.º 2
0
    def test_get_experiment_job_container_name(self):
        assert conf.get('CONTAINER_NAME_EXPERIMENT_JOB') == get_experiment_job_container_name(
            backend=None,
            framework=None,
        )

        assert conf.get('CONTAINER_NAME_EXPERIMENT_JOB') == get_experiment_job_container_name(
            backend=None,
            framework='foo',
        )

        assert conf.get('CONTAINER_NAME_EXPERIMENT_JOB') == get_experiment_job_container_name(
            backend='foo',
            framework='foo',
        )

        assert conf.get('CONTAINER_NAME_EXPERIMENT_JOB') == get_experiment_job_container_name(
            backend='foo',
            framework=None,
        )

        assert conf.get('CONTAINER_NAME_EXPERIMENT_JOB') == get_experiment_job_container_name(
            backend=ExperimentBackend.KUBEFLOW,
            framework='foo',
        )

        assert conf.get('CONTAINER_NAME_TF_JOB') == get_experiment_job_container_name(
            backend=ExperimentBackend.KUBEFLOW,
            framework=ExperimentFramework.TENSORFLOW,
        )

        assert conf.get('CONTAINER_NAME_PYTORCH_JOB') == get_experiment_job_container_name(
            backend=ExperimentBackend.KUBEFLOW,
            framework=ExperimentFramework.PYTORCH,
        )
Ejemplo n.º 3
0
def stream_logs(experiment: 'Experiment') -> Iterable[str]:
    pod_id = EXPERIMENT_JOB_NAME_FORMAT.format(
        task_type=experiment.default_job_role,
        task_idx=0,
        experiment_uuid=experiment.uuid.hex)
    k8s_manager = K8SManager(namespace=conf.get('K8S_NAMESPACE'), in_cluster=True)
    container_job_name = get_experiment_job_container_name(backend=experiment.backend,
                                                           framework=experiment.framework)
    return base.stream_logs(k8s_manager=k8s_manager,
                            pod_id=pod_id,
                            container_job_name=container_job_name)
Ejemplo n.º 4
0
async def experiment_logs_v2(request, ws, username, project_name, experiment_id):
    experiment, message = get_experiment(experiment_id=experiment_id)
    if experiment is None:
        await ws.send(get_error_message(message))
        return

    container_job_name = get_experiment_job_container_name(backend=experiment.backend,
                                                           framework=experiment.framework)

    # Stream logs
    await log_experiment(request=request,
                         ws=ws,
                         experiment=experiment,
                         container=container_job_name,
                         namespace=conf.get(K8S_NAMESPACE))
Ejemplo n.º 5
0
def process_logs(experiment: 'Experiment', temp: bool = True) -> None:
    pod_id = EXPERIMENT_JOB_NAME_FORMAT.format(
        task_type=experiment.default_job_role,
        task_idx=0,
        experiment_uuid=experiment.uuid.hex)
    k8s_manager = K8SManager(namespace=conf.get('K8S_NAMESPACE'), in_cluster=True)
    container_job_name = get_experiment_job_container_name(backend=experiment.backend,
                                                           framework=experiment.framework)
    log_lines = base.process_logs(k8s_manager=k8s_manager,
                                  pod_id=pod_id,
                                  container_job_name=container_job_name)

    safe_log_experiment(experiment_name=experiment.unique_name,
                        log_lines=log_lines,
                        temp=temp,
                        append=False)
Ejemplo n.º 6
0
def process_logs(experiment_job: 'ExperimentJob',
                 temp: bool = True,
                 k8s_manager: 'K8SManager' = None) -> None:
    task_type = experiment_job.role
    task_id = experiment_job.sequence
    if not k8s_manager:
        k8s_manager = K8SManager(namespace=conf.get(K8S_NAMESPACE), in_cluster=True)
    container_job_name = get_experiment_job_container_name(
        backend=experiment_job.experiment.backend,
        framework=experiment_job.experiment.framework)
    log_lines = base.process_logs(k8s_manager=k8s_manager,
                                  pod_id=experiment_job.pod_id,
                                  container_job_name=container_job_name,
                                  task_type=task_type,
                                  task_idx=task_id)

    safe_log_experiment_job(experiment_job_name=experiment_job.unique_name,
                            log_lines=log_lines,
                            temp=temp,
                            append=False)
Ejemplo n.º 7
0
async def experiment_logs_v2(request, ws, username, project_name, experiment_id):
    experiment, message = validate_experiment(request=request,
                                              username=username,
                                              project_name=project_name,
                                              experiment_id=experiment_id)
    if experiment is None:
        await ws.send(get_error_message(message))
        return

    auditor.record(event_type=EXPERIMENT_LOGS_VIEWED,
                   instance=experiment,
                   actor_id=request.app.user.id,
                   actor_name=request.app.user.username)

    container_job_name = get_experiment_job_container_name(backend=experiment.backend,
                                                           framework=experiment.framework)

    # Stream logs
    await log_experiment(request=request,
                         ws=ws,
                         experiment=experiment,
                         container=container_job_name,
                         namespace=conf.get(K8S_NAMESPACE))