Ejemplo n.º 1
0
    def get(self, request, *args, **kwargs):
        auditor.record(event_type=EXPERIMENT_LOGS_VIEWED,
                       instance=self.experiment,
                       actor_id=request.user.id,
                       actor_name=request.user.username)
        experiment_name = self.experiment.unique_name
        if self.experiment.is_done:
            log_path = stores.get_experiment_logs_path(
                experiment_name=experiment_name, temp=False)
            log_path = archive_logs_file(log_path=log_path,
                                         namepath=experiment_name)
        elif self.experiment.run_env and self.experiment.run_env.get(
                'in_cluster'):
            process_logs(experiment=self.experiment, temp=True)
            log_path = stores.get_experiment_logs_path(
                experiment_name=experiment_name, temp=True)
        else:
            return Response(status=status.HTTP_404_NOT_FOUND,
                            data='Experiment is still running, no logs.')

        filename = os.path.basename(log_path)
        chunk_size = 8192
        try:
            wrapped_file = FileWrapper(open(log_path, 'rb'), chunk_size)
            response = StreamingHttpResponse(
                wrapped_file, content_type=mimetypes.guess_type(log_path)[0])
            response['Content-Length'] = os.path.getsize(log_path)
            response['Content-Disposition'] = "attachment; filename={}".format(
                filename)
            return response
        except FileNotFoundError:
            _logger.warning('Log file not found: log_path=%s', log_path)
            return Response(
                status=status.HTTP_404_NOT_FOUND,
                data='Log file not found: log_path={}'.format(log_path))
Ejemplo n.º 2
0
 def test_experiment_logs_path_creation_deletion(self):
     stores.create_experiment_logs_path(
         experiment_name=self.experiment.unique_name, temp=False)
     experiment_logs_path = stores.get_experiment_logs_path(
         experiment_name=self.experiment.unique_name, temp=False)
     filepath = stores.get_experiment_logs_path(
         experiment_name=self.experiment.unique_name, temp=False)
     open(filepath, '+w')
     assert os.path.exists(experiment_logs_path) is True
     assert os.path.exists(filepath) is True
     stores_schedule_logs_deletion(persistence=None,
                                   subpath=self.experiment.subpath)
     assert os.path.exists(filepath) is False
Ejemplo n.º 3
0
def get_experiment_logs_path(experiment: Experiment) -> Optional[str]:
    experiment_name = experiment.unique_name
    if experiment.is_done:
        log_path = stores.get_experiment_logs_path(experiment_name=experiment_name, temp=False)
        logs_path = archive_logs_file(
            log_path=log_path,
            namepath=experiment_name)
    elif experiment.is_managed:
        process_logs(experiment=experiment, temp=True)
        logs_path = stores.get_experiment_logs_path(experiment_name=experiment_name, temp=True)
    else:
        return None

    return logs_path
Ejemplo n.º 4
0
def get_experiment_logs_path(experiment: Experiment) -> Optional[str]:
    experiment_name = experiment.unique_name
    if experiment.is_done:
        log_path = stores.get_experiment_logs_path(experiment_name=experiment_name, temp=False)
        logs_path = archive_logs_file(
            log_path=log_path,
            namepath=experiment_name)
    elif experiment.is_managed:
        process_logs(experiment=experiment, temp=True)
        logs_path = stores.get_experiment_logs_path(experiment_name=experiment_name, temp=True)
    else:
        # Normally it should load from the tmp file where we append the logs
        return None

    return logs_path
Ejemplo n.º 5
0
def get_config_map(namespace, project_name, experiment_group_name,
                   experiment_name, project_uuid, experiment_group_uuid,
                   experiment_uuid, original_name, cloning_strategy,
                   cluster_def, persistence_outputs, persistence_data, params,
                   log_level):
    name = constants.CONFIG_MAP_NAME.format(uuid=experiment_uuid)
    labels = get_map_labels(project_name, experiment_group_name,
                            experiment_name, project_uuid,
                            experiment_group_uuid, experiment_uuid)
    metadata = client.V1ObjectMeta(name=name,
                                   labels=labels,
                                   namespace=namespace)
    experiment_outputs_path = stores.get_experiment_outputs_path(
        persistence=persistence_outputs,
        experiment_name=experiment_name,
        original_name=original_name,
        cloning_strategy=cloning_strategy)
    experiment_logs_path = stores.get_experiment_logs_path(
        experiment_name=experiment_name, temp=False)
    data = {
        constants.CONFIG_MAP_CLUSTER_KEY_NAME: json.dumps(cluster_def),
        constants.CONFIG_MAP_PARAMS_KEY_NAME: json.dumps(params) or '{}',
        constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME: json.dumps(labels),
        constants.CONFIG_MAP_LOG_LEVEL_KEY_NAME: log_level,
        constants.CONFIG_MAP_RUN_OUTPUTS_PATH_KEY_NAME:
        experiment_outputs_path,
        constants.CONFIG_MAP_RUN_LOGS_PATH_KEY_NAME: experiment_logs_path,
        constants.CONFIG_MAP_RUN_DATA_PATHS_KEY_NAME: persistence_data,
        API_HTTP_URL: get_settings_http_api_url(),
        API_WS_HOST: get_settings_ws_api_url(),
    }
    return client.V1ConfigMap(api_version=k8s_constants.K8S_API_VERSION_V1,
                              kind=k8s_constants.K8S_CONFIG_MAP_KIND,
                              metadata=metadata,
                              data=data)
Ejemplo n.º 6
0
def safe_log_experiment(experiment_name, log_lines, temp, append=False):
    log_path = stores.get_experiment_logs_path(experiment_name=experiment_name,
                                               temp=temp)
    try:
        stores.create_experiment_logs_path(experiment_name=experiment_name,
                                           temp=temp)
        _lock_log(log_path, log_lines, append=append)
    except OSError:
        # Retry
        stores.create_experiment_logs_path(experiment_name=experiment_name,
                                           temp=temp)
        _lock_log(log_path, log_lines, append=append)
 def test_experiment_group_logs_path_creation_deletion(self):
     experiment = ExperimentFactory(user=self.project.user,
                                    project=self.project,
                                    experiment_group=self.experiment_group)
     experiment_logs_path = stores.get_experiment_logs_path(
         experiment_name=experiment.unique_name,
         temp=False)
     stores.create_experiment_logs_path(experiment_name=experiment.unique_name, temp=False)
     open(experiment_logs_path, '+w')
     experiment_group_logs_path = stores.get_experiment_group_logs_path(
         experiment_group_name=self.experiment_group.unique_name)
     # Should be true, created by the signal
     assert os.path.exists(experiment_logs_path) is True
     assert os.path.exists(experiment_group_logs_path) is True
     stores_schedule_logs_deletion(persistence=None, subpath=self.experiment_group.subpath)
     assert os.path.exists(experiment_logs_path) is False
     assert os.path.exists(experiment_group_logs_path) is False
Ejemplo n.º 8
0
 def test_project_logs_path_creation_deletion(self):
     with patch('scheduler.tasks.experiments.experiments_build.apply_async'
                ) as _:  # noqa
         experiment = ExperimentFactory(user=self.project.user,
                                        project=self.project)
     experiment_logs_path = stores.get_experiment_logs_path(
         experiment_name=experiment.unique_name, temp=False)
     stores.create_experiment_logs_path(
         experiment_name=experiment.unique_name, temp=False)
     open(experiment_logs_path, '+w')
     project_logs_path = stores.get_project_logs_path(
         project_name=self.project.unique_name)
     project_repos_path = get_project_repos_path(self.project.unique_name)
     # Should be true, created by the signal
     assert os.path.exists(experiment_logs_path) is True
     assert os.path.exists(project_logs_path) is True
     assert os.path.exists(project_repos_path) is True
     stores_schedule_logs_deletion(persistence=None,
                                   subpath=self.project.subpath)
     delete_project_repos(self.project.unique_name)
     assert os.path.exists(experiment_logs_path) is False
     assert os.path.exists(project_logs_path) is False
     assert os.path.exists(project_repos_path) is False
Ejemplo n.º 9
0
 def _get_logs_path(self, persistence_logs='default'):
     return stores.get_experiment_logs_path(
         persistence=persistence_logs,
         experiment_name=self.experiment_name,
         temp=False)
Ejemplo n.º 10
0
 def get_log_path(instance):
     return stores.get_experiment_logs_path(
         experiment_name=instance.unique_name, temp=False)
Ejemplo n.º 11
0
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          persistence_outputs=None,
                          persistence_data=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          secret_refs=None,
                          configmap_refs=None,
                          resources=None,
                          ephemeral_token=None):
        """Pod job container for task."""
        assert self.cluster_def is not None

        # Env vars preparations
        env_vars = to_list(env_vars, check_none=True)
        logs_path = stores.get_experiment_logs_path(
            experiment_name=self.experiment_name, temp=False)
        outputs_path = stores.get_experiment_outputs_path(
            persistence=persistence_outputs,
            experiment_name=self.experiment_name,
            original_name=self.original_name,
            cloning_strategy=self.cloning_strategy)
        env_vars += get_job_env_vars(
            persistence_outputs=persistence_outputs,
            outputs_path=outputs_path,
            persistence_data=persistence_data,
            log_level=self.log_level,
            logs_path=logs_path,
            outputs_refs_jobs=outputs_refs_jobs,
            outputs_refs_experiments=outputs_refs_experiments,
            ephemeral_token=ephemeral_token,
        )
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME,
                        value=json.dumps(self.cluster_def)),
            get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME,
                        value=self.declarations),
            get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME,
                        value=json.dumps(self.experiment_labels)),
        ]
        env_vars += get_resources_env_vars(resources=resources)

        # Env from configmap and secret refs
        env_from = get_pod_env_from(secret_refs=secret_refs,
                                    configmap_refs=configmap_refs)

        ports = [
            client.V1ContainerPort(container_port=port) for port in self.ports
        ]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  env_from=env_from,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)