Esempio n. 1
0
 def test_correct_and_wrong_cluster_script_in_list(self):
     wrong_cluster_script_filename = "999999.dyn-dmp.x01xx012.16.sh"
     correct_cluster_script_filename = "1234567.dyn-dmp.x99xx123.16.sh"
     file_list = [
         wrong_cluster_script_filename, correct_cluster_script_filename
     ]
     self.assertEqual(
         get_cluster_script_from_list(job_id=1234567, file_list=file_list),
         correct_cluster_script_filename)
Esempio n. 2
0
def get_job_status_and_job_dir_from_sub_dir(job_id, sub_dir, recent=False):
    """
    Get job_status for job_id from sub_dir

    If a job is recent, the `sub_dir` will be checked upto 3 times for the
    status. This feature is implemented because it can happen that the job
    switches from pending to running and that the pending folder has already
    disappeared but the cluster script is not yet created.

    Checking this only for recent jobs makes sense because this function will
    be called soon after the job has been submitted (and is thus pending).
    If a job is old and there is no pending folder, no cluster script and
    no finished folder when this function is called the job has probably been
    deleted or moved.

    Old jobs are typically only processed when the polling script is restarted
    and thus the information about previously checked joblogfiles is lost.

    Parameters
    ----------
    job_id : int
        Job id to check the status for
    sub_dir : str
        Path of the directory where the job was submitted
    recent : boolean
        Datetime in joblogfile points to recent submission. If `True`, then
        `sub_dir` is checked for `job_status` upto 3 times. Otherwise, the
        `sub_dir` is only checked once. Default is False.

    Returns
    -------
    job_status : str
        Job status short string as defined in the Job model.

    job_dir : None or str
        None if no job dir (or job_status) could be determined, otherwise the
        directory path where the job data is currently located.
    """

    logger = logging.getLogger(__name__).getChild(
        "get_job_status_and_job_dir_from_sub_dir")
    copy_logger_settings(__name__, "utils.caefileio.clusterscript")
    logger.info("Getting job_status and job_dir from sub_dir: {}".format(
        sub_dir))

    # Setting default return
    job_status = None
    job_dir = None

    checks_counter = 0
    if recent:
        checks_limit = 3
    else:
        checks_limit = 1

    while checks_counter < checks_limit and job_status is None:
        checks_counter += 1
        if checks_counter > 1:
            time.sleep(1)
            logger.debug("Re-checking sub_dir for job_status and job_dir.")
        else:
            logger.debug("Checking sub_dir for job_status and job_dir.")
        logger.debug(" {}/{} checks.".format(checks_counter, checks_limit))
        try:
            sub_dir_content = os.listdir(sub_dir)
            logger.debug(
                "Content of sub_dir: {}".format(sorted(sub_dir_content)))
        except NotADirectoryError as err_msg:
            logger.warning(f"sub_dir is not a directory: {err_msg}")
        except FileNotFoundError as err_msg:
            logger.info(f"sub_dir not found: {err_msg}")
        except PermissionError as err_msg:
            logger.info(f"No access to sub_dir: {err_msg}")
        except OSError as err_msg:
            logger.warning(
                f"OSError occurred. Not sure what causes it: {err_msg}")
        else:
            finished_job_foldername = str(job_id)
            pending_job_foldername = str(job_id) + ".pending"
            cluster_script_filename = get_cluster_script_from_list(
                job_id=job_id, file_list=sub_dir_content)
            # Order finish-running-pending makes sense, because if maybe
            # there are some leftovers (like files) from the previous stage
            # I will not check for them.
            if finished_job_foldername in sub_dir_content:
                logger.debug("Finished job folder name in sub_dir!")
                job_status = Job.JOB_STATUS_FINISHED
                job_dir = os.path.join(sub_dir, finished_job_foldername)
            elif cluster_script_filename is not None:
                job_status = Job.JOB_STATUS_RUNNING
                job_dir = get_cluster_scratch_dir_from_script(
                    os.path.join(sub_dir, cluster_script_filename))
            elif pending_job_foldername in sub_dir_content:
                job_status = Job.JOB_STATUS_PENDING
                job_dir = os.path.join(sub_dir, pending_job_foldername)
                logger.debug(
                    "Pending job folder name in sub dir: {}".format(job_dir))
            # If none of the above was successful, the job folder might be
            # renamed. This is checked here. It has to be after the pending
            # check because it maybe extended with anything.
            else:
                renamed_job_folder = get_renamed_job_folder_from_list(
                    job_id=job_id,
                    file_list=sub_dir_content)
                if renamed_job_folder:
                    renamed_job_folder_path = os.path.join(
                        sub_dir,
                        renamed_job_folder)
                    logger.debug(
                        "Possibly found renamed job folder: {}".format(
                            renamed_job_folder))
                    if os.path.isdir(renamed_job_folder_path):
                        logger.debug("Renamed job folder is dir: {}".format(
                            renamed_job_folder_path))
                        logger.debug("Assuming finished job.")
                        job_status = Job.JOB_STATUS_FINISHED
                        job_dir = renamed_job_folder_path

    if job_status is not None and checks_counter > 1:
        logger.debug(
            "=" * 80 + "\nRe-checking sub_dir is worth it!\n" + ("=" * 80))

    if job_status is not None and job_dir is not None:
        if os.path.isdir(job_dir):
            logger.info("job_status determined from sub_dir: {}".format(
                job_status))
            logger.info("job_dir determined from sub_dir: {}".format(job_dir))
            return job_status, job_dir
        else:
            logger.error("Found job_dir is not a directory: {}".format(
                job_dir))

    logger.info("No job_status or job_dir could be determined from sub_dir")
    return Job.JOB_STATUS_NONE, None
Esempio n. 3
0
 def test_wrong_cluster_script_in_list(self):
     cluster_script_filename = "999999.dyn-dmp.l01cl012.16.sh"
     file_list = [cluster_script_filename]
     self.assertIsNone(
         get_cluster_script_from_list(job_id=1234567, file_list=file_list))
Esempio n. 4
0
 def test_another_correct_cluster_script_in_list(self):
     cluster_script_filename = "1234567.dyn-dmp.x99xx012.8.sh"
     file_list = [cluster_script_filename]
     self.assertEqual(
         get_cluster_script_from_list(job_id=1234567, file_list=file_list),
         cluster_script_filename)
Esempio n. 5
0
 def test_only_correct_cluster_script_in_list_pam_2(self):
     cluster_script_filename = "1234567.pam-dmp.x12xx123.16.sh"
     file_list = [cluster_script_filename]
     self.assertEqual(
         get_cluster_script_from_list(job_id=1234567, file_list=file_list),
         cluster_script_filename)
Esempio n. 6
0
 def test_empty_list(self):
     file_list = []
     self.assertIsNone(
         get_cluster_script_from_list(job_id=7654321, file_list=file_list))