예제 #1
0
    def get(self, request, *args, **kwargs):
        auditor.record(event_type=EXPERIMENT_LOGS_VIEWED,
                       instance=self.experiment,
                       actor_id=request.user.id,
                       actor_name=request.user.username)
        experiment_name = self.experiment.unique_name
        if self.experiment.is_done:
            log_path = get_experiment_logs_path(experiment_name, temp=False)
        else:
            process_logs(experiment=self.experiment, temp=True)
            log_path = get_experiment_logs_path(
                experiment_name=experiment_name, temp=True)

        filename = os.path.basename(log_path)
        chunk_size = 8192
        try:
            wrapped_file = FileWrapper(open(log_path, 'rb'), chunk_size)
            response = StreamingHttpResponse(
                wrapped_file, content_type=mimetypes.guess_type(log_path)[0])
            response['Content-Length'] = os.path.getsize(log_path)
            response['Content-Disposition'] = "attachment; filename={}".format(
                filename)
            return response
        except FileNotFoundError:
            _logger.warning('Log file not found: log_path=%s', log_path)
            return Response(
                status=status.HTTP_404_NOT_FOUND,
                data='Log file not found: log_path={}'.format(log_path))
예제 #2
0
 def test_experiment_logs_path_creation_deletion(self):
     create_experiment_logs_path(self.experiment.unique_name)
     experiment_logs_path = get_experiment_logs_path(self.experiment.unique_name)
     filepath = get_experiment_logs_path(self.experiment.unique_name)
     open(filepath, '+w')
     assert os.path.exists(experiment_logs_path) is True
     assert os.path.exists(filepath) is True
     delete_experiment_logs(self.experiment.unique_name)
     assert os.path.exists(filepath) is False
예제 #3
0
 def test_experiment_logs_path_creation_deletion(self):
     experiment_logs_path = get_experiment_logs_path(
         self.experiment.unique_name)
     filepath = get_experiment_logs_path(self.experiment.unique_name)
     open(filepath, '+w')
     # Should be true, created by the signal
     assert os.path.exists(experiment_logs_path) is True
     assert os.path.exists(filepath) is True
     delete_experiment_logs(self.experiment.unique_name)
     assert os.path.exists(filepath) is False
 def test_experiment_logs_path_creation_deletion(self):
     create_experiment_logs_path(self.experiment.unique_name)
     experiment_logs_path = get_experiment_logs_path(
         self.experiment.unique_name)
     filepath = get_experiment_logs_path(self.experiment.unique_name)
     open(filepath, '+w')
     assert os.path.exists(experiment_logs_path) is True
     assert os.path.exists(filepath) is True
     delete_experiment_logs(self.experiment.unique_name)
     assert os.path.exists(filepath) is False
예제 #5
0
def handle_events_job_logs(experiment_name,
                           experiment_uuid,
                           job_uuid,
                           log_line,
                           task_type=None,
                           task_idx=None):
    # Must persist resources if logs according to the config
    if not Experiment.objects.filter(uuid=experiment_uuid).exists():
        return
    _logger.debug('handling log event for %s %s', experiment_uuid, job_uuid)
    if task_type and task_idx:
        log_line = '{}.{} -- {}'.format(task_type, int(task_idx) + 1, log_line)
    xp_logger = logging.getLogger(experiment_name)
    log_path = get_experiment_logs_path(experiment_name)
    try:
        log_handler = logging.FileHandler(log_path)
        log_formatter = logging.Formatter(
            '%(asctime)s %(levelname)s %(message)s')
        log_handler.setFormatter(log_formatter)
        xp_logger.addHandler(log_handler)
        xp_logger.setLevel(logging.INFO)
        xp_logger.info(log_line)
        xp_logger.handlers = []
    except OSError:
        # TODO: retry instead?
        pass
    def test_handle_events_job_logs_create_one_handler(self):
        with patch('scheduler.tasks.experiments.experiments_build.apply_async'
                   ) as _:  # noqa
            experiment = ExperimentFactory()

        params = dict(experiment_name=experiment.unique_name,
                      experiment_uuid=experiment.uuid.hex,
                      job_uuid=uuid.uuid4().hex,
                      log_line='First test',
                      task_type=TaskType.MASTER,
                      task_idx=0)
        handle_events_job_logs(**params)

        # Check new log path is created
        log_path = get_experiment_logs_path(experiment.unique_name)
        assert os.path.exists(log_path) is True

        # Check the logger has no file handler, and one line created
        xp_logger = logging.getLogger(experiment.unique_name)
        assert len(xp_logger.handlers) == 0  # pylint:disable=len-as-condition
        assert self.file_line_count(log_path) == 1  # pylint:disable=len-as-condition

        # Calling again the task should not reuse handler, and create a new line
        handle_events_job_logs(**params)

        # Check the logger has no file handler, and one line created
        xp_logger = logging.getLogger(experiment.unique_name)
        assert len(xp_logger.handlers) == 0  # pylint:disable=len-as-condition
        assert self.file_line_count(log_path) == 2
예제 #7
0
def get_config_map(namespace, project_name, experiment_group_name,
                   experiment_name, project_uuid, experiment_group_uuid,
                   experiment_uuid, original_name, cloning_strategy,
                   cluster_def, declarations, log_level):
    name = constants.CONFIG_MAP_NAME.format(experiment_uuid=experiment_uuid)
    labels = get_map_labels(project_name, experiment_group_name,
                            experiment_name, project_uuid,
                            experiment_group_uuid, experiment_uuid)
    metadata = client.V1ObjectMeta(name=name,
                                   labels=labels,
                                   namespace=namespace)
    experiment_outputs_path = get_experiment_outputs_path(
        experiment_name=experiment_name,
        original_name=original_name,
        cloning_strategy=cloning_strategy)
    experiment_logs_path = get_experiment_logs_path(experiment_name)
    experiment_data_path = get_project_data_path(project_name)
    data = {
        constants.CONFIG_MAP_CLUSTER_KEY_NAME: json.dumps(cluster_def),
        constants.CONFIG_MAP_DECLARATIONS_KEY_NAME: json.dumps(declarations)
        or '{}',
        constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME: json.dumps(labels),
        constants.CONFIG_MAP_LOG_LEVEL_KEY_NAME: log_level,
        API_KEY_NAME: get_settings_api_url(),
        constants.CONFIG_MAP_EXPERIMENT_OUTPUTS_PATH_KEY_NAME:
        experiment_outputs_path,
        constants.CONFIG_MAP_EXPERIMENT_LOGS_PATH_KEY_NAME:
        experiment_logs_path,
        constants.CONFIG_MAP_EXPERIMENT_DATA_PATH_KEY_NAME:
        experiment_data_path,
    }
    return client.V1ConfigMap(api_version=k8s_constants.K8S_API_VERSION_V1,
                              kind=k8s_constants.K8S_CONFIG_MAP_KIND,
                              metadata=metadata,
                              data=data)
예제 #8
0
파일: pods.py 프로젝트: xuduofeng/polyaxon
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          persistence_outputs=None,
                          persistence_data=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          secret_refs=None,
                          configmap_refs=None,
                          resources=None,
                          ephemeral_token=None):
        """Pod job container for task."""
        assert self.cluster_def is not None

        # Env vars preparations
        env_vars = to_list(env_vars, check_none=True)
        outputs_path = get_experiment_outputs_path(
            persistence_outputs=persistence_outputs,
            experiment_name=self.experiment_name,
            original_name=self.original_name,
            cloning_strategy=self.cloning_strategy)
        env_vars += get_job_env_vars(
            persistence_outputs=persistence_outputs,
            outputs_path=outputs_path,
            persistence_data=persistence_data,
            log_level=self.log_level,
            logs_path=get_experiment_logs_path(self.experiment_name,
                                               temp=False),
            outputs_refs_jobs=outputs_refs_jobs,
            outputs_refs_experiments=outputs_refs_experiments,
            ephemeral_token=ephemeral_token,
        )
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME,
                        value=json.dumps(self.cluster_def)),
            get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME,
                        value=self.declarations),
            get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME,
                        value=json.dumps(self.experiment_labels)),
        ]
        env_vars += get_resources_env_vars(resources=resources)

        # Env from configmap and secret refs
        env_from = get_pod_env_from(secret_refs=secret_refs,
                                    configmap_refs=configmap_refs)

        ports = [
            client.V1ContainerPort(container_port=port) for port in self.ports
        ]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  env_from=env_from,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
예제 #9
0
def safe_log_experiment_job(experiment_name, log_lines):
    log_path = get_experiment_logs_path(experiment_name)
    try:
        _lock_log(log_path, log_lines)
    except (FileNotFoundError, OSError):
        create_experiment_logs_path(experiment_name=experiment_name)
        # Retry
        _lock_log(log_path, log_lines)
예제 #10
0
def safe_log_experiment_job(experiment_name, log_lines):
    log_path = get_experiment_logs_path(experiment_name)
    try:
        _lock_log(log_path, log_lines)
    except (FileNotFoundError, OSError):
        create_experiment_logs_path(experiment_name=experiment_name)
        # Retry
        _lock_log(log_path, log_lines)
예제 #11
0
 def test_experiment_group_logs_path_creation_deletion(self):
     experiment = ExperimentFactory(user=self.project.user,
                                    project=self.project,
                                    experiment_group=self.experiment_group)
     experiment_logs_path = get_experiment_logs_path(experiment.unique_name)
     open(experiment_logs_path, '+w')
     experiment_group_logs_path = get_experiment_group_logs_path(
         self.experiment_group.unique_name)
     # Should be true, created by the signal
     assert os.path.exists(experiment_logs_path) is True
     assert os.path.exists(experiment_group_logs_path) is True
     delete_experiment_group_logs(self.experiment_group.unique_name)
     assert os.path.exists(experiment_logs_path) is False
     assert os.path.exists(experiment_group_logs_path) is False
 def test_experiment_group_logs_path_creation_deletion(self):
     experiment = ExperimentFactory(user=self.project.user,
                                    project=self.project,
                                    experiment_group=self.experiment_group)
     experiment_logs_path = get_experiment_logs_path(experiment.unique_name)
     create_experiment_logs_path(experiment.unique_name)
     open(experiment_logs_path, '+w')
     experiment_group_logs_path = get_experiment_group_logs_path(
         self.experiment_group.unique_name)
     # Should be true, created by the signal
     assert os.path.exists(experiment_logs_path) is True
     assert os.path.exists(experiment_group_logs_path) is True
     delete_experiment_group_logs(self.experiment_group.unique_name)
     assert os.path.exists(experiment_logs_path) is False
     assert os.path.exists(experiment_group_logs_path) is False
예제 #13
0
파일: pods.py 프로젝트: whmnoe4j/polyaxon
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          persistence_outputs=None,
                          persistence_data=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          resources=None):
        """Pod job container for task."""
        assert self.cluster_def is not None

        env_vars = get_list(env_vars)
        outputs_path = get_experiment_outputs_path(
            persistence_outputs=persistence_outputs,
            experiment_name=self.experiment_name,
            original_name=self.original_name,
            cloning_strategy=self.cloning_strategy)
        env_vars += get_job_env_vars(
            log_level=self.log_level,
            outputs_path=outputs_path,
            data_paths=get_data_paths(persistence_data),
            logs_path=get_experiment_logs_path(self.experiment_name),
            outputs_refs_jobs=outputs_refs_jobs,
            outputs_refs_experiments=outputs_refs_experiments)
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME,
                        value=json.dumps(self.cluster_def)),
            get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME,
                        value=self.declarations),
            get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME,
                        value=json.dumps(self.experiment_labels)),
        ]

        env_vars += get_resources_env_vars(resources=resources)

        ports = [
            client.V1ContainerPort(container_port=port) for port in self.ports
        ]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
예제 #14
0
    def setUp(self):
        super().setUp()
        project = ProjectFactory(user=self.auth_client.user)
        experiment = ExperimentFactory(project=project)
        self.url = '/{}/{}/{}/experiments/{}/logs'.format(
            API_V1, project.user.username, project.name, experiment.sequence)

        log_path = get_experiment_logs_path(experiment.unique_name)
        fake = Faker()
        self.logs = []
        for _ in range(self.num_log_lines):
            self.logs.append(fake.sentence())
        with open(log_path, 'w') as file:
            for line in self.logs:
                file.write(line)
                file.write('\n')
예제 #15
0
 def test_project_logs_path_creation_deletion(self):
     with patch('scheduler.tasks.experiments.experiments_build.apply_async') as _:  # noqa
         experiment = ExperimentFactory(user=self.project.user, project=self.project)
     experiment_logs_path = get_experiment_logs_path(experiment.unique_name)
     create_experiment_logs_path(experiment.unique_name)
     open(experiment_logs_path, '+w')
     project_logs_path = get_project_logs_path(self.project.unique_name)
     project_repos_path = get_project_logs_path(self.project.unique_name)
     # Should be true, created by the signal
     assert os.path.exists(experiment_logs_path) is True
     assert os.path.exists(project_logs_path) is True
     assert os.path.exists(project_repos_path) is True
     delete_project_logs(self.project.unique_name)
     assert os.path.exists(experiment_logs_path) is False
     assert os.path.exists(project_logs_path) is False
     assert os.path.exists(project_repos_path) is False
예제 #16
0
파일: pods.py 프로젝트: ttsvetanov/polyaxon
    def get_pod_container(self,
                          volume_mounts,
                          env_vars=None,
                          command=None,
                          args=None,
                          resources=None):
        """Pod job container for task."""
        assert self.cluster_def is not None

        env_vars = get_list(env_vars)
        outputs_path = get_experiment_outputs_path(
            experiment_name=self.experiment_name,
            original_name=self.original_name,
            cloning_strategy=self.cloning_strategy)
        env_vars += get_job_env_vars(
            log_level=self.log_level,
            outputs_path=outputs_path,
            logs_path=get_experiment_logs_path(self.experiment_name),
            data_path=get_experiment_data_path(self.experiment_name),
            project_data_path=get_project_data_path(project_name=self.project_name)
        )
        env_vars += [
            get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME,
                        value=json.dumps(self.cluster_def)),
            get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME,
                        value=self.declarations),
            get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME,
                        value=json.dumps(self.experiment_labels)),
        ]

        if resources:
            env_vars += get_resources_env_vars(resources=resources)

        ports = [client.V1ContainerPort(container_port=port) for port in self.ports]
        return client.V1Container(name=self.job_container_name,
                                  image=self.job_docker_image,
                                  command=command,
                                  args=args,
                                  ports=ports,
                                  env=env_vars,
                                  resources=get_resources(resources),
                                  volume_mounts=volume_mounts)
예제 #17
0
    def get(self, request, *args, **kwargs):
        experiment = self.get_experiment()
        auditor.record(event_type=EXPERIMENT_LOGS_VIEWED,
                       instance=self.experiment,
                       actor_id=request.user.id)
        log_path = get_experiment_logs_path(experiment.unique_name)

        filename = os.path.basename(log_path)
        chunk_size = 8192
        try:
            wrapped_file = FileWrapper(open(log_path, 'rb'), chunk_size)
            response = StreamingHttpResponse(wrapped_file,
                                             content_type=mimetypes.guess_type(log_path)[0])
            response['Content-Length'] = os.path.getsize(log_path)
            response['Content-Disposition'] = "attachment; filename={}".format(filename)
            return response
        except FileNotFoundError:
            logger.warning('Log file not found: log_path=%s', log_path)
            return Response(status=status.HTTP_404_NOT_FOUND,
                            data='Log file not found: log_path={}'.format(log_path))
예제 #18
0
    def setUp(self):
        super().setUp()
        project = ProjectFactory(user=self.auth_client.user)
        experiment = ExperimentFactory(project=project)
        self.url = '/{}/{}/{}/experiments/{}/logs'.format(
            API_V1,
            project.user.username,
            project.name,
            experiment.sequence)

        log_path = get_experiment_logs_path(experiment.unique_name)
        create_experiment_logs_path(experiment_name=experiment.unique_name)
        fake = Faker()
        self.logs = []
        for _ in range(self.num_log_lines):
            self.logs.append(fake.sentence())
        with open(log_path, 'w') as file:
            for line in self.logs:
                file.write(line)
                file.write('\n')
예제 #19
0
def get_config_map(namespace,
                   project_name,
                   experiment_group_name,
                   experiment_name,
                   project_uuid,
                   experiment_group_uuid,
                   experiment_uuid,
                   original_name,
                   cloning_strategy,
                   cluster_def,
                   declarations,
                   log_level):
    name = constants.CONFIG_MAP_NAME.format(uuid=experiment_uuid)
    labels = get_map_labels(project_name,
                            experiment_group_name,
                            experiment_name,
                            project_uuid,
                            experiment_group_uuid,
                            experiment_uuid)
    metadata = client.V1ObjectMeta(name=name, labels=labels, namespace=namespace)
    experiment_outputs_path = get_experiment_outputs_path(experiment_name=experiment_name,
                                                          original_name=original_name,
                                                          cloning_strategy=cloning_strategy)
    experiment_logs_path = get_experiment_logs_path(experiment_name)
    experiment_data_path = get_project_data_path(project_name)
    data = {
        constants.CONFIG_MAP_CLUSTER_KEY_NAME: json.dumps(cluster_def),
        constants.CONFIG_MAP_DECLARATIONS_KEY_NAME: json.dumps(declarations) or '{}',
        constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME: json.dumps(labels),
        constants.CONFIG_MAP_LOG_LEVEL_KEY_NAME: log_level,
        API_KEY_NAME: get_settings_api_url(),
        constants.CONFIG_MAP_RUN_OUTPUTS_PATH_KEY_NAME: experiment_outputs_path,
        constants.CONFIG_MAP_RUN_LOGS_PATH_KEY_NAME: experiment_logs_path,
        constants.CONFIG_MAP_RUN_DATA_PATH_KEY_NAME: experiment_data_path,
    }
    return client.V1ConfigMap(api_version=k8s_constants.K8S_API_VERSION_V1,
                              kind=k8s_constants.K8S_CONFIG_MAP_KIND,
                              metadata=metadata,
                              data=data)
예제 #20
0
 def get_log_path(instance):
     return get_experiment_logs_path(instance.unique_name)
 def get_log_path(instance):
     return get_experiment_logs_path(instance.unique_name)