Beispiel #1
0
def update_charon_with_local_jobs_status(config=None, config_file_path=None):
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    with get_db_session() as session:
        charon_session = CharonSession()
        for sample_entry in session.query(SampleAnalysis).all():
            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            engine=sample_entry.engine
            # Only one of these will have a value
            slurm_job_id = sample_entry.slurm_job_id
            process_id = sample_entry.process_id
            piper_exit_code = get_exit_code(workflow_name=workflow,
                                            project_base_path=project_base_path,
                                            project_name=project_name,
                                            project_id=project_id,
                                            sample_id=sample_id)
            label = "project/sample {}/{}".format(project_name, sample_id)

            try:
                project_obj = create_project_obj_from_analysis_log(project_name,
                                                                   project_id,
                                                                   project_base_path,
                                                                   sample_id,
                                                                   workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for sample run {}/{}: {}'.format(project_id,
                                                                   sample_id,
                                                                   e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
                continue
            try:
                if piper_exit_code and piper_exit_code == 0:
                    # 0 -> Job finished successfully
                    set_status = "ANALYZED"
                    info_text = ('Workflow "{}" for {} finished succesfully. '
                                 'Recording status {} in Charon'.format(workflow, label,
                                                                        set_status))
                    LOG.info(info_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="INFO", info_text=info_text)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 analysis_status=set_status)
                    recurse_status="DONE"
                    recurse_status_for_sample(project_obj, recurse_status)
                    # Job is only deleted if the Charon status update succeeds
                    session.delete(sample_entry)
                    # Parse seqrun output results / update Charon
                    # This is a semi-optional step -- failure here will send an
                    # email but not more than once. The record is still removed
                    # from the local jobs database, so this will have to be done
                    # manually if you want it done at all.
                    piper_qc_dir = os.path.join(project_base_path, "ANALYSIS",
                                                project_id,"piper_ngi",  "02_preliminary_alignment_qc")
                    update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir)
                elif piper_exit_code and piper_exit_code >0:
                    # 1 -> Job failed
                    set_status = "FAILED"
                    error_text = ('Workflow "{}" for {} failed. Recording status '
                                 '{} in Charon.'.format(workflow, label, set_status))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR", info_text=error_text)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 analysis_status=set_status)
                    recurse_status_for_sample(project_obj, set_status)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running OR exit code was never written (failure)
                    JOB_FAILED = None
                    if slurm_job_id:
                        try:
                            slurm_exit_code = get_slurm_job_status(slurm_job_id)
                        except ValueError as e:
                            slurm_exit_code = 1
                        if slurm_exit_code is not None: # "None" indicates job is still running
                            JOB_FAILED = True
                    else:
                        if not psutil.pid_exists(process_id):
                            # Job did not write an exit code and is also not running
                            JOB_FAILED = True
                    if JOB_FAILED:
                        set_status = "FAILED"
                        error_text = ('No exit code found but job not running for '
                                      '{}: setting status to {} in Charon'.format(label, set_status))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project_name, sample_name=sample_id,
                                      engine_name=engine, level="ERROR", info_text=error_text)
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     analysis_status=set_status)
                        recurse_status_for_sample(project_obj, set_status)
                        # Job is only deleted if the Charon update succeeds
                        LOG.debug("Deleting local entry {}".format(sample_entry))
                        session.delete(sample_entry)
                    else: # Job still running
                        charon_status = charon_session.sample_get(projectid=project_id,
                                                                  sampleid=sample_id)['analysis_status']
                        if not charon_status == "UNDER_ANALYSIS":
                            set_status = "UNDER_ANALYSIS"
                            LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but '
                                     'local process tracking database indicates it is running. '
                                     'Setting value in Charon to {}.'.format(label, charon_status,
                                                                             set_status))
                            charon_session.sample_update(projectid=project_id,
                                                         sampleid=sample_id,
                                                         analysis_status=set_status)
                            recurse_status_for_sample(project_obj, "RUNNING")
            except CharonError as e:
                error_text = ('Unable to update Charon status for "{}": {}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
            except OSError as e:
                error_text = ('Permissions error when trying to update Charon '
                              'status for "{}": {}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
        session.commit()
Beispiel #2
0
def analyze(project, sample, exec_mode="sbatch", restart_finished_jobs=False, 
            restart_running_jobs=False, config=None, config_file_path=None):
    """Analyze data at the sample level.

    :param NGIProject project: the project to analyze
    :param NGISample sample: the sample to analyzed
    :param str exec_mode: "sbatch" or "local"
    :param dict config: The parsed configuration file (optional)
    :param str config_file_path: The path to the configuration file (optional)

    :raises ValueError: If exec_mode is an unsupported value
    """
    try:
        check_for_preexisting_sample_runs(project, sample, restart_running_jobs, restart_finished_jobs)
    except RuntimeError as e:
        # may want to process anyway.
        raise RuntimeError('Aborting processing of project/sample "{}/{}": '
                               '{}'.format(project, sample, e))
    if exec_mode.lower() not in ("sbatch", "local"):
        raise ValueError(('"exec_mode" param must be one of "sbatch" or "local" ')
                         ('value was "{}"'.format(exec_mode)))
    modules_to_load = ["java/sun_jdk1.7.0_25", "R/2.15.0"]
    load_modules(modules_to_load)
    LOG.info('Sample "{}" in project "{}" is ready for processing.'.format(sample, project))
    for workflow_subtask in workflows.get_subtasks_for_level(level="sample"):
        if not is_sample_analysis_running_local(workflow_subtask=workflow_subtask,
                                                project_id=project.project_id,
                                                sample_id=sample.name):
            try:
                log_file_path = create_log_file_path(workflow_subtask=workflow_subtask,
                                                     project_base_path=project.base_path,
                                                     project_name=project.dirname,
                                                     project_id=project.project_id,
                                                     sample_id=sample.name)
                rotate_file(log_file_path)
                exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask,
                                                            project_base_path=project.base_path,
                                                            project_name=project.dirname,
                                                            project_id=project.project_id,
                                                            sample_id=sample.name)
                setup_xml_cl, setup_xml_path = build_setup_xml(project=project,
                                                               sample=sample,
                                                               local_scratch_mode=(exec_mode == "sbatch"),
                                                               config=config)
                piper_cl = build_piper_cl(project=project,
                                          workflow_name=workflow_subtask,
                                          setup_xml_path=setup_xml_path,
                                          exit_code_path=exit_code_path,
                                          config=config,
                                          exec_mode=exec_mode)
                remove_previous_sample_analyses(project)

                if exec_mode == "sbatch":
                    process_id = None
                    slurm_job_id = sbatch_piper_sample([setup_xml_cl, piper_cl],
                                                       workflow_subtask,
                                                       project, sample,
                                                       restart_finished_jobs=restart_finished_jobs)
                    for x in xrange(10): # Time delay to let sbatch get its act together (takes a few seconds to be visible with sacct)
                        try:
                            get_slurm_job_status(slurm_job_id)
                            break
                        except ValueError:
                            time.sleep(2)
                    else:
                        LOG.error('sbatch file for sample {}/{} did not '
                                  'queue properly! Job ID {} cannot be '
                                  'found.'.format(project, sample, slurm_job_id))
                else:
                    ## FIXME Now this is broken again
                    raise NotImplementedError("Sorry dude it's a no-go")
                    slurm_job_id = None
                    launch_piper_job(setup_xml_cl, project)
                    process_handle = launch_piper_job(piper_cl, project)
                    process_id = process_handle.pid
                try:
                    record_process_sample(project=project,
                                          sample=sample,
                                          analysis_module_name="piper_ngi",
                                          slurm_job_id=slurm_job_id,
                                          process_id=process_id,
                                          workflow_subtask=workflow_subtask)
                except RuntimeError as e:
                    LOG.error('Could not record process for project/sample '
                              '{}/{}, workflow {}'.format(project, sample,
                                                          workflow_subtask))
                    ## Question: should we just kill the run in this case or let it go?
                    continue
            except (NotImplementedError, RuntimeError, ValueError) as e:
                error_msg = ('Processing project "{}" / sample "{}" failed: '
                             '{}'.format(project, sample, e.__repr__()))
                LOG.error(error_msg)
Beispiel #3
0
def analyze(project,
            sample,
            exec_mode="sbatch",
            restart_finished_jobs=False,
            restart_running_jobs=False,
            config=None,
            config_file_path=None):
    """Analyze data at the sample level.

    :param NGIProject project: the project to analyze
    :param NGISample sample: the sample to analyzed
    :param str exec_mode: "sbatch" or "local"
    :param dict config: The parsed configuration file (optional)
    :param str config_file_path: The path to the configuration file (optional)

    :raises ValueError: If exec_mode is an unsupported value
    """
    try:
        check_for_preexisting_sample_runs(project, sample,
                                          restart_running_jobs,
                                          restart_finished_jobs)
    except RuntimeError as e:
        # may want to process anyway.
        raise RuntimeError('Aborting processing of project/sample "{}/{}": '
                           '{}'.format(project, sample, e))
    if exec_mode.lower() not in ("sbatch", "local"):
        raise ValueError(
            ('"exec_mode" param must be one of "sbatch" or "local" ')(
                'value was "{}"'.format(exec_mode)))
    modules_to_load = ["java/sun_jdk1.7.0_25", "R/2.15.0"]
    load_modules(modules_to_load)
    LOG.info('Sample "{}" in project "{}" is ready for processing.'.format(
        sample, project))
    for workflow_subtask in workflows.get_subtasks_for_level(level="sample"):
        if not is_sample_analysis_running_local(
                workflow_subtask=workflow_subtask,
                project_id=project.project_id,
                sample_id=sample.name):
            try:
                log_file_path = create_log_file_path(
                    workflow_subtask=workflow_subtask,
                    project_base_path=project.base_path,
                    project_name=project.dirname,
                    project_id=project.project_id,
                    sample_id=sample.name)
                rotate_file(log_file_path)
                exit_code_path = create_exit_code_file_path(
                    workflow_subtask=workflow_subtask,
                    project_base_path=project.base_path,
                    project_name=project.dirname,
                    project_id=project.project_id,
                    sample_id=sample.name)
                setup_xml_cl, setup_xml_path = build_setup_xml(
                    project=project,
                    sample=sample,
                    local_scratch_mode=(exec_mode == "sbatch"),
                    config=config)
                piper_cl = build_piper_cl(project=project,
                                          workflow_name=workflow_subtask,
                                          setup_xml_path=setup_xml_path,
                                          exit_code_path=exit_code_path,
                                          config=config,
                                          exec_mode=exec_mode)
                remove_previous_sample_analyses(project)

                if exec_mode == "sbatch":
                    process_id = None
                    slurm_job_id = sbatch_piper_sample(
                        [setup_xml_cl, piper_cl],
                        workflow_subtask,
                        project,
                        sample,
                        restart_finished_jobs=restart_finished_jobs)
                    for x in xrange(
                            10
                    ):  # Time delay to let sbatch get its act together (takes a few seconds to be visible with sacct)
                        try:
                            get_slurm_job_status(slurm_job_id)
                            break
                        except ValueError:
                            time.sleep(2)
                    else:
                        LOG.error('sbatch file for sample {}/{} did not '
                                  'queue properly! Job ID {} cannot be '
                                  'found.'.format(project, sample,
                                                  slurm_job_id))
                else:
                    ## FIXME Now this is broken again
                    raise NotImplementedError("Sorry dude it's a no-go")
                    slurm_job_id = None
                    launch_piper_job(setup_xml_cl, project)
                    process_handle = launch_piper_job(piper_cl, project)
                    process_id = process_handle.pid
                try:
                    record_process_sample(project=project,
                                          sample=sample,
                                          analysis_module_name="piper_ngi",
                                          slurm_job_id=slurm_job_id,
                                          process_id=process_id,
                                          workflow_subtask=workflow_subtask)
                except RuntimeError as e:
                    LOG.error('Could not record process for project/sample '
                              '{}/{}, workflow {}'.format(
                                  project, sample, workflow_subtask))
                    ## Question: should we just kill the run in this case or let it go?
                    continue
            except (NotImplementedError, RuntimeError, ValueError) as e:
                error_msg = ('Processing project "{}" / sample "{}" failed: '
                             '{}'.format(project, sample, e.__repr__()))
                LOG.error(error_msg)