Example #1
0
    def _complete(self):
        ret = self._pipe.returncode
        exec_time_s = time.time() - self._start_time

        job_filename = self._job.name
        illegal_chars = ("/", "\\", ":")
        for char in illegal_chars:
            job_filename = job_filename.replace(char, "-")

        status = "finished"
        output_dir = os.path.join(self._output, JOBS_OUTPUT_DIR,
                                  self._job.name)
        bytes_consumed = get_directory_size_bytes(output_dir)
        event = StructuredLogEvent(
            source=self._job.name,
            category=EVENT_CATEGORY_RESOURCE_UTIL,
            name=EVENT_NAME_BYTES_CONSUMED,
            message="job output directory size",
            bytes_consumed=bytes_consumed,
        )
        log_event(event)
        result = Result(self._job.name, ret, status, exec_time_s)
        ResultsAggregator.append(self._results_filename, result)

        logger.info("Job %s completed return_code=%s exec_time_s=%s",
                    self._job.name, ret, exec_time_s)
Example #2
0
 def cancel(self):
     self._return_code = 1
     self._is_complete = True
     result = Result(self._job.name, self._return_code,
                     JobCompletionStatus.CANCELED, 0.0)
     ResultsAggregator.append(self._output, result, batch_id=self._batch_id)
     logger.info("Canceled job %s", self._job.name)
Example #3
0
 def cancel(self):
     self._return_code = 1
     self._is_complete = True
     if self._is_manager_node:
         result = Result(
             self._job.name,
             self._return_code,
             JobCompletionStatus.CANCELED,
             0.0,
             hpc_job_id=self._hpc_job_id,
         )
         ResultsAggregator.append(self._output, result, batch_id=self._batch_id)
         logger.info("Canceled job %s", self._job.name)
     else:
         logger.info("Canceled job %s on non-manager node", self._job.name)
Example #4
0
    def _complete(self):
        self._return_code = self._pipe.returncode
        exec_time_s = time.time() - self._start_time

        if not self._is_manager_node:
            # This will happen on a multi-node job. Don't complete it multiple times.
            logger.info(
                "Job %s completed on non-manager node return_code=%s exec_time_s=%s",
                self._job.name,
                self._return_code,
                exec_time_s,
            )
            return

        status = JobCompletionStatus.FINISHED
        output_dir = self._output / JOBS_OUTPUT_DIR / self._job.name
        bytes_consumed = get_directory_size_bytes(output_dir)
        event = StructuredLogEvent(
            source=self._job.name,
            category=EVENT_CATEGORY_RESOURCE_UTIL,
            name=EVENT_NAME_BYTES_CONSUMED,
            message="job output directory size",
            bytes_consumed=bytes_consumed,
        )
        log_event(event)
        result = Result(
            self._job.name, self._return_code, status, exec_time_s, hpc_job_id=self._hpc_job_id
        )
        ResultsAggregator.append(self._output, result, batch_id=self._batch_id)

        logger.info(
            "Job %s completed return_code=%s exec_time_s=%s hpc_job_id=%s",
            self._job.name,
            self._return_code,
            exec_time_s,
            self._hpc_job_id,
        )