def get(self, request, *args, **kwargs): auditor.record(event_type=EXPERIMENT_LOGS_VIEWED, instance=self.experiment, actor_id=request.user.id, actor_name=request.user.username) experiment_name = self.experiment.unique_name if self.experiment.is_done: log_path = stores.get_experiment_logs_path( experiment_name=experiment_name, temp=False) log_path = archive_logs_file(log_path=log_path, namepath=experiment_name) elif self.experiment.run_env and self.experiment.run_env.get( 'in_cluster'): process_logs(experiment=self.experiment, temp=True) log_path = stores.get_experiment_logs_path( experiment_name=experiment_name, temp=True) else: return Response(status=status.HTTP_404_NOT_FOUND, data='Experiment is still running, no logs.') filename = os.path.basename(log_path) chunk_size = 8192 try: wrapped_file = FileWrapper(open(log_path, 'rb'), chunk_size) response = StreamingHttpResponse( wrapped_file, content_type=mimetypes.guess_type(log_path)[0]) response['Content-Length'] = os.path.getsize(log_path) response['Content-Disposition'] = "attachment; filename={}".format( filename) return response except FileNotFoundError: _logger.warning('Log file not found: log_path=%s', log_path) return Response( status=status.HTTP_404_NOT_FOUND, data='Log file not found: log_path={}'.format(log_path))
def test_experiment_logs_path_creation_deletion(self): stores.create_experiment_logs_path( experiment_name=self.experiment.unique_name, temp=False) experiment_logs_path = stores.get_experiment_logs_path( experiment_name=self.experiment.unique_name, temp=False) filepath = stores.get_experiment_logs_path( experiment_name=self.experiment.unique_name, temp=False) open(filepath, '+w') assert os.path.exists(experiment_logs_path) is True assert os.path.exists(filepath) is True stores_schedule_logs_deletion(persistence=None, subpath=self.experiment.subpath) assert os.path.exists(filepath) is False
def get_experiment_logs_path(experiment: Experiment) -> Optional[str]: experiment_name = experiment.unique_name if experiment.is_done: log_path = stores.get_experiment_logs_path(experiment_name=experiment_name, temp=False) logs_path = archive_logs_file( log_path=log_path, namepath=experiment_name) elif experiment.is_managed: process_logs(experiment=experiment, temp=True) logs_path = stores.get_experiment_logs_path(experiment_name=experiment_name, temp=True) else: return None return logs_path
def get_experiment_logs_path(experiment: Experiment) -> Optional[str]: experiment_name = experiment.unique_name if experiment.is_done: log_path = stores.get_experiment_logs_path(experiment_name=experiment_name, temp=False) logs_path = archive_logs_file( log_path=log_path, namepath=experiment_name) elif experiment.is_managed: process_logs(experiment=experiment, temp=True) logs_path = stores.get_experiment_logs_path(experiment_name=experiment_name, temp=True) else: # Normally it should load from the tmp file where we append the logs return None return logs_path
def get_config_map(namespace, project_name, experiment_group_name, experiment_name, project_uuid, experiment_group_uuid, experiment_uuid, original_name, cloning_strategy, cluster_def, persistence_outputs, persistence_data, params, log_level): name = constants.CONFIG_MAP_NAME.format(uuid=experiment_uuid) labels = get_map_labels(project_name, experiment_group_name, experiment_name, project_uuid, experiment_group_uuid, experiment_uuid) metadata = client.V1ObjectMeta(name=name, labels=labels, namespace=namespace) experiment_outputs_path = stores.get_experiment_outputs_path( persistence=persistence_outputs, experiment_name=experiment_name, original_name=original_name, cloning_strategy=cloning_strategy) experiment_logs_path = stores.get_experiment_logs_path( experiment_name=experiment_name, temp=False) data = { constants.CONFIG_MAP_CLUSTER_KEY_NAME: json.dumps(cluster_def), constants.CONFIG_MAP_PARAMS_KEY_NAME: json.dumps(params) or '{}', constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME: json.dumps(labels), constants.CONFIG_MAP_LOG_LEVEL_KEY_NAME: log_level, constants.CONFIG_MAP_RUN_OUTPUTS_PATH_KEY_NAME: experiment_outputs_path, constants.CONFIG_MAP_RUN_LOGS_PATH_KEY_NAME: experiment_logs_path, constants.CONFIG_MAP_RUN_DATA_PATHS_KEY_NAME: persistence_data, API_HTTP_URL: get_settings_http_api_url(), API_WS_HOST: get_settings_ws_api_url(), } return client.V1ConfigMap(api_version=k8s_constants.K8S_API_VERSION_V1, kind=k8s_constants.K8S_CONFIG_MAP_KIND, metadata=metadata, data=data)
def safe_log_experiment(experiment_name, log_lines, temp, append=False): log_path = stores.get_experiment_logs_path(experiment_name=experiment_name, temp=temp) try: stores.create_experiment_logs_path(experiment_name=experiment_name, temp=temp) _lock_log(log_path, log_lines, append=append) except OSError: # Retry stores.create_experiment_logs_path(experiment_name=experiment_name, temp=temp) _lock_log(log_path, log_lines, append=append)
def test_experiment_group_logs_path_creation_deletion(self): experiment = ExperimentFactory(user=self.project.user, project=self.project, experiment_group=self.experiment_group) experiment_logs_path = stores.get_experiment_logs_path( experiment_name=experiment.unique_name, temp=False) stores.create_experiment_logs_path(experiment_name=experiment.unique_name, temp=False) open(experiment_logs_path, '+w') experiment_group_logs_path = stores.get_experiment_group_logs_path( experiment_group_name=self.experiment_group.unique_name) # Should be true, created by the signal assert os.path.exists(experiment_logs_path) is True assert os.path.exists(experiment_group_logs_path) is True stores_schedule_logs_deletion(persistence=None, subpath=self.experiment_group.subpath) assert os.path.exists(experiment_logs_path) is False assert os.path.exists(experiment_group_logs_path) is False
def test_project_logs_path_creation_deletion(self): with patch('scheduler.tasks.experiments.experiments_build.apply_async' ) as _: # noqa experiment = ExperimentFactory(user=self.project.user, project=self.project) experiment_logs_path = stores.get_experiment_logs_path( experiment_name=experiment.unique_name, temp=False) stores.create_experiment_logs_path( experiment_name=experiment.unique_name, temp=False) open(experiment_logs_path, '+w') project_logs_path = stores.get_project_logs_path( project_name=self.project.unique_name) project_repos_path = get_project_repos_path(self.project.unique_name) # Should be true, created by the signal assert os.path.exists(experiment_logs_path) is True assert os.path.exists(project_logs_path) is True assert os.path.exists(project_repos_path) is True stores_schedule_logs_deletion(persistence=None, subpath=self.project.subpath) delete_project_repos(self.project.unique_name) assert os.path.exists(experiment_logs_path) is False assert os.path.exists(project_logs_path) is False assert os.path.exists(project_repos_path) is False
def _get_logs_path(self, persistence_logs='default'): return stores.get_experiment_logs_path( persistence=persistence_logs, experiment_name=self.experiment_name, temp=False)
def get_log_path(instance): return stores.get_experiment_logs_path( experiment_name=instance.unique_name, temp=False)
def get_pod_container(self, volume_mounts, env_vars=None, command=None, args=None, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, secret_refs=None, configmap_refs=None, resources=None, ephemeral_token=None): """Pod job container for task.""" assert self.cluster_def is not None # Env vars preparations env_vars = to_list(env_vars, check_none=True) logs_path = stores.get_experiment_logs_path( experiment_name=self.experiment_name, temp=False) outputs_path = stores.get_experiment_outputs_path( persistence=persistence_outputs, experiment_name=self.experiment_name, original_name=self.original_name, cloning_strategy=self.cloning_strategy) env_vars += get_job_env_vars( persistence_outputs=persistence_outputs, outputs_path=outputs_path, persistence_data=persistence_data, log_level=self.log_level, logs_path=logs_path, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, ephemeral_token=ephemeral_token, ) env_vars += [ get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME, value=json.dumps(self.cluster_def)), get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME, value=self.declarations), get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME, value=json.dumps(self.experiment_labels)), ] env_vars += get_resources_env_vars(resources=resources) # Env from configmap and secret refs env_from = get_pod_env_from(secret_refs=secret_refs, configmap_refs=configmap_refs) ports = [ client.V1ContainerPort(container_port=port) for port in self.ports ] return client.V1Container(name=self.job_container_name, image=self.job_docker_image, command=command, args=args, ports=ports, env=env_vars, env_from=env_from, resources=get_resources(resources), volume_mounts=volume_mounts)