コード例 #1
0
def find_on_path(binary_name, config=None):
    """Determines if the binary in question is on the PATH, loading modules
    as specified in the qc section of the config file.

    :param str binary_name: The name of the binary (e.g. "bowtie2")
    :param dict config: The parsed pipeline/system config (optional)

    :returns: True if the binary is on the PATH; False if not
    :rtype: boolean
    """
    if not config: config = {}
    LOG.info('Path to {} not specified in config file; '
             'checking if it is on PATH'.format(binary_name))
    modules_to_load = get_all_modules_for_workflow(binary_name, config)
    if modules_to_load:
        LOG.debug("Loading modules {}".format(", ".join(modules_to_load)))
        load_modules(modules_to_load)
    try:
        with open(os.devnull, 'w') as DEVNULL:
            subprocess.check_call(shlex.split("{} --version".format(binary_name)),
                                  stdout=DEVNULL, stderr=DEVNULL)
    except (OSError, subprocess.CalledProcessError) as e:
        return False
    else:
        return True
コード例 #2
0
ファイル: workflows.py プロジェクト: johanherman/ngi_pipeline
def find_on_path(binary_name, config=None):
    """Determines if the binary in question is on the PATH, loading modules
    as specified in the qc section of the config file.

    :param str binary_name: The name of the binary (e.g. "bowtie2")
    :param dict config: The parsed pipeline/system config (optional)

    :returns: True if the binary is on the PATH; False if not
    :rtype: boolean
    """
    if not config: config = {}
    LOG.info('Path to {} not specified in config file; '
             'checking if it is on PATH'.format(binary_name))
    modules_to_load = get_all_modules_for_workflow(binary_name, config)
    if modules_to_load:
        load_modules(modules_to_load)
    try:
        with open(os.devnull, 'w') as DEVNULL:
            subprocess.check_call(shlex.split(
                "{} --version".format(binary_name)),
                                  stdout=DEVNULL,
                                  stderr=DEVNULL)
    except (OSError, subprocess.CalledProcessError) as e:
        return False
    else:
        return True
コード例 #3
0
def analyze_seqrun(project, sample, libprep, seqrun, config=None, config_file_path=None):
    """Analyze data at the sequencing run (individual fastq) level.

    :param NGIProject project: the project to analyze
    :param NGISample sample: the sample to analyzed
    :param NGILibraryPrep libprep: The library prep to analyzed
    :seqrun NGISeqrun seqrun: The sequencing run to analyzed
    :param dict config: The parsed configuration file (optional)
    :param str config_file_path: The path to the configuration file (optional)
    """

    modules_to_load = ["java/sun_jdk1.7.0_25", "R/2.15.0"]
    load_modules(modules_to_load)
    for workflow_subtask in get_subtasks_for_level(level="seqrun"):
        if not is_seqrun_analysis_running_local(workflow_subtask=workflow_subtask,
                                                project_id=project.project_id,
                                                sample_id=sample.name,
                                                libprep_id=libprep.name,
                                                seqrun_id=seqrun.name):
            try:
                ## Temporarily logging to a file until we get ELK set up
                log_file_path = create_log_file_path(workflow_subtask=workflow_subtask,
                                                     project_base_path=project.base_path,
                                                     project_name=project.name,
                                                     sample_id=sample.name,
                                                     libprep_id=libprep.name,
                                                     seqrun_id=seqrun.name)
                rotate_log(log_file_path)
                # Store the exit code of detached processes
                exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask,
                                                            project_base_path=project.base_path,
                                                            project_name=project.name,
                                                            sample_id=sample.name,
                                                            libprep_id=libprep.name,
                                                            seqrun_id=seqrun.name)
                build_setup_xml(project, config, sample, libprep.name, seqrun.name)
                command_line = build_piper_cl(project, workflow_subtask, exit_code_path, config)
                p_handle = launch_piper_job(command_line, project, log_file_path)
                try:
                    record_process_seqrun(project=project, sample=sample, libprep=libprep,
                                          seqrun=seqrun, workflow_subtask=workflow_subtask,
                                          analysis_module_name="piper_ngi",
                                          analysis_dir=project.analysis_dir,
                                          pid=p_handle.pid)
                except CharonError as e:
                    ## This is a problem. If the job isn't recorded, we won't
                    ## ever know that it has been run and its results will be ignored.
                    ## I think? Or no I guess if it's relaunched then the results will be there.
                    ## But we will have multiple processes running.
                    ## FIXME fix this
                    LOG.error("<Could not record ...>")
                    continue
            except (NotImplementedError, RuntimeError) as e:
                error_msg = ('Processing project "{}" / sample "{}" / libprep "{}" / '
                             'seqrun "{}" failed: {}'.format(project, sample, libprep, seqrun,
                                                           e.__repr__()))
                LOG.error(error_msg)
コード例 #4
0
def analyze_sample(project, sample, config=None, config_file_path=None):
    """Analyze data at the sample level.

    :param NGIProject project: the project to analyze
    :param NGISample sample: the sample to analyzed
    :param dict config: The parsed configuration file (optional)
    :param str config_file_path: The path to the configuration file (optional)
    """
    modules_to_load = ["java/sun_jdk1.7.0_25", "R/2.15.0"]
    load_modules(modules_to_load)
    charon_session = CharonSession()
    # Determine if we can begin sample-level processing yet.
    # Conditions are that the coverage is above 28.9X
    # If these conditions become more complex we can create a function for this
    sample_total_autosomal_coverage = charon_session.sample_get(project.project_id,
                                     sample.name).get('total_autosomal_coverage')
    if sample_total_autosomal_coverage > 28.4:
        LOG.info('Sample "{}" in project "{}" is ready for processing.'.format(sample, project))
        for workflow_subtask in get_subtasks_for_level(level="sample"):
            if not is_sample_analysis_running_local(workflow_subtask=workflow_subtask,
                                                    project_id=project.project_id,
                                                    sample_id=sample.name):
                try:
                    ## Temporarily logging to a file until we get ELK set up
                    log_file_path = create_log_file_path(workflow_subtask=workflow_subtask,
                                                         project_base_path=project.base_path,
                                                         project_name=project.name,
                                                         sample_id=sample.name)
                    rotate_log(log_file_path)
                    # Store the exit code of detached processes
                    exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask,
                                                                project_base_path=project.base_path,
                                                                project_name=project.name,
                                                                sample_id=sample.name)

                    build_setup_xml(project, config, sample)
                    command_line = build_piper_cl(project, workflow_subtask, exit_code_path, config)
                    p_handle = launch_piper_job(command_line, project, log_file_path)
                    try:
                        record_process_sample(project=project, sample=sample,
                                              workflow_subtask=workflow_subtask,
                                              analysis_module_name="piper_ngi",
                                              analysis_dir=project.analysis_dir,
                                              pid=p_handle.pid)
                    except RuntimeError as e:
                        LOG.error(e)
                        continue
                except (NotImplementedError, RuntimeError) as e:
                    error_msg = ('Processing project "{}" / sample "{}" failed: '
                                 '{}'.format(project, sample, e.__repr__()))
                    LOG.error(error_msg)
    else:
        LOG.info('Sample "{}" in project "{}" is not yet ready for '
                 'processing.'.format(sample, project))
コード例 #5
0
ファイル: launchers.py プロジェクト: Hammarn/ngi_pipeline
def analyze(project, sample, exec_mode="sbatch", restart_finished_jobs=False, 
            restart_running_jobs=False, config=None, config_file_path=None):
    """Analyze data at the sample level.

    :param NGIProject project: the project to analyze
    :param NGISample sample: the sample to analyzed
    :param str exec_mode: "sbatch" or "local"
    :param dict config: The parsed configuration file (optional)
    :param str config_file_path: The path to the configuration file (optional)

    :raises ValueError: If exec_mode is an unsupported value
    """
    try:
        check_for_preexisting_sample_runs(project, sample, restart_running_jobs, restart_finished_jobs)
    except RuntimeError as e:
        # may want to process anyway.
        raise RuntimeError('Aborting processing of project/sample "{}/{}": '
                               '{}'.format(project, sample, e))
    if exec_mode.lower() not in ("sbatch", "local"):
        raise ValueError(('"exec_mode" param must be one of "sbatch" or "local" ')
                         ('value was "{}"'.format(exec_mode)))
    modules_to_load = ["java/sun_jdk1.7.0_25", "R/2.15.0"]
    load_modules(modules_to_load)
    LOG.info('Sample "{}" in project "{}" is ready for processing.'.format(sample, project))
    for workflow_subtask in workflows.get_subtasks_for_level(level="sample"):
        if not is_sample_analysis_running_local(workflow_subtask=workflow_subtask,
                                                project_id=project.project_id,
                                                sample_id=sample.name):
            try:
                log_file_path = create_log_file_path(workflow_subtask=workflow_subtask,
                                                     project_base_path=project.base_path,
                                                     project_name=project.dirname,
                                                     project_id=project.project_id,
                                                     sample_id=sample.name)
                rotate_file(log_file_path)
                exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask,
                                                            project_base_path=project.base_path,
                                                            project_name=project.dirname,
                                                            project_id=project.project_id,
                                                            sample_id=sample.name)
                setup_xml_cl, setup_xml_path = build_setup_xml(project=project,
                                                               sample=sample,
                                                               local_scratch_mode=(exec_mode == "sbatch"),
                                                               config=config)
                piper_cl = build_piper_cl(project=project,
                                          workflow_name=workflow_subtask,
                                          setup_xml_path=setup_xml_path,
                                          exit_code_path=exit_code_path,
                                          config=config,
                                          exec_mode=exec_mode)
                remove_previous_sample_analyses(project)

                if exec_mode == "sbatch":
                    process_id = None
                    slurm_job_id = sbatch_piper_sample([setup_xml_cl, piper_cl],
                                                       workflow_subtask,
                                                       project, sample,
                                                       restart_finished_jobs=restart_finished_jobs)
                    for x in xrange(10): # Time delay to let sbatch get its act together (takes a few seconds to be visible with sacct)
                        try:
                            get_slurm_job_status(slurm_job_id)
                            break
                        except ValueError:
                            time.sleep(2)
                    else:
                        LOG.error('sbatch file for sample {}/{} did not '
                                  'queue properly! Job ID {} cannot be '
                                  'found.'.format(project, sample, slurm_job_id))
                else:
                    ## FIXME Now this is broken again
                    raise NotImplementedError("Sorry dude it's a no-go")
                    slurm_job_id = None
                    launch_piper_job(setup_xml_cl, project)
                    process_handle = launch_piper_job(piper_cl, project)
                    process_id = process_handle.pid
                try:
                    record_process_sample(project=project,
                                          sample=sample,
                                          analysis_module_name="piper_ngi",
                                          slurm_job_id=slurm_job_id,
                                          process_id=process_id,
                                          workflow_subtask=workflow_subtask)
                except RuntimeError as e:
                    LOG.error('Could not record process for project/sample '
                              '{}/{}, workflow {}'.format(project, sample,
                                                          workflow_subtask))
                    ## Question: should we just kill the run in this case or let it go?
                    continue
            except (NotImplementedError, RuntimeError, ValueError) as e:
                error_msg = ('Processing project "{}" / sample "{}" failed: '
                             '{}'.format(project, sample, e.__repr__()))
                LOG.error(error_msg)
コード例 #6
0
 def test_load_modules(self):
     modules_to_load = ['R/3.1.0', 'java/sun_jdk1.7.0_25']
     load_modules(modules_to_load)
     assert(subprocess.check_output(shlex.split("R --version")).split()[2] == "3.1.0")
コード例 #7
0
def analyze(analysis_object,
            level='sample',
            config=None,
            config_file_path=None):
    """Analyze data at the sample level.

    :param NGIAnalysis analysis_object: holds all the parameters for the analysis

    :raises ValueError: If exec_mode is an unsupported value
    """
    charon_session = CharonSession()
    for sample in analysis_object.project:
        try:
            charon_reported_status = charon_session.sample_get(
                analysis_object.project.project_id,
                sample).get('analysis_status')
            # Check Charon to ensure this hasn't already been processed
            do_analyze = handle_sample_status(analysis_object, sample,
                                              charon_reported_status)
            if not do_analyze:
                continue
        except CharonError as e:
            LOG.error(e)
            continue
        if level == "sample":
            status_field = "alignment_status"
        elif level == "genotype":
            status_field = "genotype_status"
        else:
            LOG.warn('Unknown workflow level: "{}"'.format(level))
            status_field = "alignment_status"  # Or should we abort?
        try:
            check_for_preexisting_sample_runs(
                analysis_object.project, sample,
                analysis_object.restart_running_jobs,
                analysis_object.restart_finished_jobs, status_field)
        except RuntimeError as e:
            raise RuntimeError(
                'Aborting processing of project/sample "{}/{}": '
                '{}'.format(analysis_object.project, sample, e))
        if analysis_object.exec_mode.lower() not in ("sbatch", "local"):
            raise ValueError(
                '"exec_mode" param must be one of "sbatch" or "local" '
                'value was "{}"'.format(analysis_object.exec_mode))
        if analysis_object.exec_mode == "local":
            modules_to_load = analysis_object.config.get("piper", {}).get(
                "load_modules", [])
            load_modules(modules_to_load)
        for workflow_subtask in workflows.get_subtasks_for_level(level=level):
            if level == "genotype":
                genotype_status = None  # Some records in Charon lack this field, I'm guessing
                try:
                    charon_session = CharonSession()
                    genotype_status = charon_session.sample_get(
                        projectid=analysis_object.project.project_id,
                        sampleid=sample.name).get("genotype_status")
                except CharonError as e:
                    LOG.error(
                        'Couldn\'t determine genotyping status for project/'
                        'sample "{}/{}"; skipping analysis.'.format(
                            analysis_object.project, sample))
                    continue
                if find_previous_genotype_analyses(
                        analysis_object.project,
                        sample) or genotype_status == "DONE":
                    if not analysis_object.restart_finished_jobs:
                        LOG.info(
                            'Project/sample "{}/{}" has completed genotype '
                            'analysis previously; skipping (use flag to force '
                            'analysis)'.format(analysis_object.project,
                                               sample))
                        continue
            if analysis_object.restart_running_jobs:
                # Kill currently-running jobs if they exist
                kill_running_sample_analysis(
                    workflow_subtask=workflow_subtask,
                    project_id=analysis_object.project.project_id,
                    sample_id=sample.name)
            # This checks the local jobs database
            if not is_sample_analysis_running_local(
                    workflow_subtask=workflow_subtask,
                    project_id=analysis_object.project.project_id,
                    sample_id=sample.name):
                LOG.info('Launching "{}" analysis for sample "{}" in project '
                         '"{}"'.format(workflow_subtask, sample,
                                       analysis_object.project))
                try:
                    log_file_path = create_log_file_path(
                        workflow_subtask=workflow_subtask,
                        project_base_path=analysis_object.project.base_path,
                        project_name=analysis_object.project.dirname,
                        project_id=analysis_object.project.project_id,
                        sample_id=sample.name)
                    rotate_file(log_file_path)
                    exit_code_path = create_exit_code_file_path(
                        workflow_subtask=workflow_subtask,
                        project_base_path=analysis_object.project.base_path,
                        project_name=analysis_object.project.dirname,
                        project_id=analysis_object.project.project_id,
                        sample_id=sample.name)
                    if level == "sample":
                        if not analysis_object.keep_existing_data:
                            remove_previous_sample_analyses(
                                analysis_object.project, sample)
                            default_files_to_copy = None
                    elif level == "genotype":
                        if not analysis_object.keep_existing_data:
                            remove_previous_genotype_analyses(
                                analysis_object.project)
                            default_files_to_copy = None

                    # Update the project to keep only valid fastq files for setup.xml creation
                    if level == "genotype":
                        updated_project, default_files_to_copy = \
                                collect_files_for_sample_analysis(analysis_object.project,
                                                                  sample,
                                                                  restart_finished_jobs=True,
                                                                  status_field="genotype_status")
                    else:
                        updated_project, default_files_to_copy = \
                                collect_files_for_sample_analysis(analysis_object.project,
                                                                  sample,
                                                                  analysis_object.restart_finished_jobs,
                                                                  status_field="alignment_status")
                    setup_xml_cl, setup_xml_path = build_setup_xml(
                        project=updated_project,
                        sample=sample,
                        workflow=workflow_subtask,
                        local_scratch_mode=(
                            analysis_object.exec_mode == "sbatch"),
                        config=analysis_object.config)
                    piper_cl = build_piper_cl(
                        project=analysis_object.project,
                        workflow_name=workflow_subtask,
                        setup_xml_path=setup_xml_path,
                        exit_code_path=exit_code_path,
                        config=analysis_object.config,
                        exec_mode=analysis_object.exec_mode,
                        generate_bqsr_bam=analysis_object.generate_bqsr_bam)
                    if analysis_object.exec_mode == "sbatch":
                        process_id = None
                        slurm_job_id = sbatch_piper_sample(
                            [setup_xml_cl, piper_cl],
                            workflow_subtask,
                            analysis_object.project,
                            sample,
                            restart_finished_jobs=analysis_object.
                            restart_finished_jobs,
                            files_to_copy=default_files_to_copy)
                        for x in xrange(10):
                            # Time delay to let sbatch get its act together
                            # (takes a few seconds to be visible with sacct)
                            try:
                                get_slurm_job_status(slurm_job_id)
                                break
                            except ValueError:
                                time.sleep(2)
                        else:
                            LOG.error('sbatch file for sample {}/{} did not '
                                      'queue properly! Job ID {} cannot be '
                                      'found.'.format(analysis_object.project,
                                                      sample, slurm_job_id))
                    else:  # "local"
                        raise NotImplementedError(
                            'Local execution not currently implemented. '
                            'I\'m sure Denis can help you with this.')
                        #slurm_job_id = None
                        #launch_piper_job(setup_xml_cl, project)
                        #process_handle = launch_piper_job(piper_cl, project)
                        #process_id = process_handle.pid
                    try:
                        record_process_sample(
                            project=analysis_object.project,
                            sample=sample,
                            analysis_module_name="piper_ngi",
                            slurm_job_id=slurm_job_id,
                            process_id=process_id,
                            workflow_subtask=workflow_subtask)
                    except RuntimeError as e:
                        LOG.error(e)
                        ## Question: should we just kill the run in this case or let it go?
                        continue
                except (NotImplementedError, RuntimeError, ValueError) as e:
                    error_msg = (
                        'Processing project "{}" / sample "{}" / workflow "{}" '
                        'failed: {}'.format(analysis_object.project, sample,
                                            workflow_subtask, e))
                    LOG.error(error_msg)
コード例 #8
0
 def test_load_modules(self):
     modules_to_load = ['R/3.1.0', 'java/sun_jdk1.7.0_25']
     load_modules(modules_to_load)
     assert(subprocess.check_output(shlex.split("R --version")).split()[2] == "3.1.0")
コード例 #9
0
def analyze(project, sample,
            exec_mode="sbatch", 
            restart_finished_jobs=False,
            restart_running_jobs=False,
            keep_existing_data=False,
            level="sample",
            genotype_file=None,
            config=None, config_file_path=None,
            generate_bqsr_bam=False):
    """Analyze data at the sample level.

    :param NGIProject project: the project to analyze
    :param NGISample sample: the sample to analyzed
    :param str exec_mode: "sbatch" or "local" (local not implemented)
    :param bool restart_finished_jobs: Restart jobs that are already done (have a .done file)
    :param bool restart_running_jobs: Kill and restart currently-running jobs
    :param str level: The level on which to perform the analysis ("sample" or "genotype")
    :param str genotype_file: The path to the genotype file (only relevant for genotype analysis)
    :param dict config: The parsed configuration file (optional)
    :param str config_file_path: The path to the configuration file (optional)

    :raises ValueError: If exec_mode is an unsupported value
    """
    if level == "sample":
        status_field = "alignment_status"
    elif level == "genotype":
        status_field = "genotype_status"
    else:
        LOG.warn('Unknown workflow level: "{}"'.format(level))
        status_field = "alignment_status" # Or should we abort?
    try:
        check_for_preexisting_sample_runs(project, sample, restart_running_jobs,
                                          restart_finished_jobs, status_field)
    except RuntimeError as e:
        raise RuntimeError('Aborting processing of project/sample "{}/{}": '
                           '{}'.format(project, sample, e))
    if exec_mode.lower() not in ("sbatch", "local"):
        raise ValueError('"exec_mode" param must be one of "sbatch" or "local" '
                         'value was "{}"'.format(exec_mode))
    if exec_mode == "local":
        modules_to_load = config.get("piper", {}).get("load_modules", [])
        load_modules(modules_to_load)
    for workflow_subtask in workflows.get_subtasks_for_level(level=level):
        if level == "genotype":
            genotype_status = None # Some records in Charon lack this field, I'm guessing
            try:
                charon_session = CharonSession()
                genotype_status = charon_session.sample_get(projectid=project.project_id,
                                                            sampleid=sample.name).get("genotype_status")
            except CharonError as e:
                LOG.error('Couldn\'t determine genotyping status for project/'
                          'sample "{}/{}"; skipping analysis.'.format(project, sample))
                continue
            if find_previous_genotype_analyses(project, sample) or genotype_status == "DONE":
                if not restart_finished_jobs:
                    LOG.info('Project/sample "{}/{}" has completed genotype '
                             'analysis previously; skipping (use flag to force '
                             'analysis)'.format(project, sample))
                    continue
        if restart_running_jobs:
            # Kill currently-running jobs if they exist
            kill_running_sample_analysis(workflow_subtask=workflow_subtask,
                                         project_id=project.project_id,
                                         sample_id=sample.name)
        # This checks the local jobs database
        if not is_sample_analysis_running_local(workflow_subtask=workflow_subtask,
                                                project_id=project.project_id,
                                                sample_id=sample.name):
            LOG.info('Launching "{}" analysis for sample "{}" in project '
                     '"{}"'.format(workflow_subtask, sample, project))
            try:
                log_file_path = create_log_file_path(workflow_subtask=workflow_subtask,
                                                     project_base_path=project.base_path,
                                                     project_name=project.dirname,
                                                     project_id=project.project_id,
                                                     sample_id=sample.name)
                rotate_file(log_file_path)
                exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask,
                                                            project_base_path=project.base_path,
                                                            project_name=project.dirname,
                                                            project_id=project.project_id,
                                                            sample_id=sample.name)
                if level == "sample":
                    if not keep_existing_data:
                        remove_previous_sample_analyses(project, sample)
                        default_files_to_copy=None
                elif level == "genotype":
                    if not keep_existing_data:
                        remove_previous_genotype_analyses(project)
                        default_files_to_copy=None

                # Update the project to keep only valid fastq files for setup.xml creation
                if level == "genotype":
                    updated_project, default_files_to_copy = \
                            collect_files_for_sample_analysis(project,
                                                              sample,
                                                              restart_finished_jobs=True,
                                                              status_field="genotype_status")
                else:
                    updated_project, default_files_to_copy = \
                            collect_files_for_sample_analysis(project,
                                                              sample,
                                                              restart_finished_jobs,
                                                              status_field="alignment_status")
                setup_xml_cl, setup_xml_path = build_setup_xml(project=updated_project,
                                                               sample=sample,
                                                               workflow=workflow_subtask,
                                                               local_scratch_mode=(exec_mode == "sbatch"),
                                                               config=config)
                piper_cl = build_piper_cl(project=project,
                                          workflow_name=workflow_subtask,
                                          setup_xml_path=setup_xml_path,
                                          exit_code_path=exit_code_path,
                                          config=config,
                                          exec_mode=exec_mode,
                                          generate_bqsr_bam=generate_bqsr_bam)
                if exec_mode == "sbatch":
                    process_id = None
                    slurm_job_id = sbatch_piper_sample([setup_xml_cl, piper_cl],
                                                       workflow_subtask,
                                                       project, sample,
                                                       restart_finished_jobs=restart_finished_jobs,
                                                       files_to_copy=default_files_to_copy)
                    for x in xrange(10):
                        # Time delay to let sbatch get its act together
                        # (takes a few seconds to be visible with sacct)
                        try:
                            get_slurm_job_status(slurm_job_id)
                            break
                        except ValueError:
                            time.sleep(2)
                    else:
                        LOG.error('sbatch file for sample {}/{} did not '
                                  'queue properly! Job ID {} cannot be '
                                  'found.'.format(project, sample, slurm_job_id))
                else: # "local"
                    raise NotImplementedError('Local execution not currently implemented. '
                                              'I\'m sure Denis can help you with this.')
                    #slurm_job_id = None
                    #launch_piper_job(setup_xml_cl, project)
                    #process_handle = launch_piper_job(piper_cl, project)
                    #process_id = process_handle.pid
                try:
                    record_process_sample(project=project,
                                          sample=sample,
                                          analysis_module_name="piper_ngi",
                                          slurm_job_id=slurm_job_id,
                                          process_id=process_id,
                                          workflow_subtask=workflow_subtask)
                except RuntimeError as e:
                    LOG.error(e)
                    ## Question: should we just kill the run in this case or let it go?
                    continue
            except (NotImplementedError, RuntimeError, ValueError) as e:
                error_msg = ('Processing project "{}" / sample "{}" / workflow "{}" '
                             'failed: {}'.format(project, sample,
                                                 workflow_subtask,
                                                 e))
                LOG.error(error_msg)
コード例 #10
0
 def test_load_modules(self, mock_split):
     mock_split.return_value = ['echo', 'os.environ["TEST"] = "test";']
     modules_to_load = ['Any/module']
     load_modules(modules_to_load)
     set_envar = os.environ.get('TEST')
     self.assertEqual(set_envar, 'test')
コード例 #11
0
ファイル: launchers.py プロジェクト: johanherman/ngi_pipeline
def analyze(project,
            sample,
            exec_mode="sbatch",
            restart_finished_jobs=False,
            restart_running_jobs=False,
            config=None,
            config_file_path=None):
    """Analyze data at the sample level.

    :param NGIProject project: the project to analyze
    :param NGISample sample: the sample to analyzed
    :param str exec_mode: "sbatch" or "local"
    :param dict config: The parsed configuration file (optional)
    :param str config_file_path: The path to the configuration file (optional)

    :raises ValueError: If exec_mode is an unsupported value
    """
    try:
        check_for_preexisting_sample_runs(project, sample,
                                          restart_running_jobs,
                                          restart_finished_jobs)
    except RuntimeError as e:
        # may want to process anyway.
        raise RuntimeError('Aborting processing of project/sample "{}/{}": '
                           '{}'.format(project, sample, e))
    if exec_mode.lower() not in ("sbatch", "local"):
        raise ValueError(
            ('"exec_mode" param must be one of "sbatch" or "local" ')(
                'value was "{}"'.format(exec_mode)))
    modules_to_load = ["java/sun_jdk1.7.0_25", "R/2.15.0"]
    load_modules(modules_to_load)
    LOG.info('Sample "{}" in project "{}" is ready for processing.'.format(
        sample, project))
    for workflow_subtask in workflows.get_subtasks_for_level(level="sample"):
        if not is_sample_analysis_running_local(
                workflow_subtask=workflow_subtask,
                project_id=project.project_id,
                sample_id=sample.name):
            try:
                log_file_path = create_log_file_path(
                    workflow_subtask=workflow_subtask,
                    project_base_path=project.base_path,
                    project_name=project.dirname,
                    project_id=project.project_id,
                    sample_id=sample.name)
                rotate_file(log_file_path)
                exit_code_path = create_exit_code_file_path(
                    workflow_subtask=workflow_subtask,
                    project_base_path=project.base_path,
                    project_name=project.dirname,
                    project_id=project.project_id,
                    sample_id=sample.name)
                setup_xml_cl, setup_xml_path = build_setup_xml(
                    project=project,
                    sample=sample,
                    local_scratch_mode=(exec_mode == "sbatch"),
                    config=config)
                piper_cl = build_piper_cl(project=project,
                                          workflow_name=workflow_subtask,
                                          setup_xml_path=setup_xml_path,
                                          exit_code_path=exit_code_path,
                                          config=config,
                                          exec_mode=exec_mode)
                remove_previous_sample_analyses(project)

                if exec_mode == "sbatch":
                    process_id = None
                    slurm_job_id = sbatch_piper_sample(
                        [setup_xml_cl, piper_cl],
                        workflow_subtask,
                        project,
                        sample,
                        restart_finished_jobs=restart_finished_jobs)
                    for x in xrange(
                            10
                    ):  # Time delay to let sbatch get its act together (takes a few seconds to be visible with sacct)
                        try:
                            get_slurm_job_status(slurm_job_id)
                            break
                        except ValueError:
                            time.sleep(2)
                    else:
                        LOG.error('sbatch file for sample {}/{} did not '
                                  'queue properly! Job ID {} cannot be '
                                  'found.'.format(project, sample,
                                                  slurm_job_id))
                else:
                    ## FIXME Now this is broken again
                    raise NotImplementedError("Sorry dude it's a no-go")
                    slurm_job_id = None
                    launch_piper_job(setup_xml_cl, project)
                    process_handle = launch_piper_job(piper_cl, project)
                    process_id = process_handle.pid
                try:
                    record_process_sample(project=project,
                                          sample=sample,
                                          analysis_module_name="piper_ngi",
                                          slurm_job_id=slurm_job_id,
                                          process_id=process_id,
                                          workflow_subtask=workflow_subtask)
                except RuntimeError as e:
                    LOG.error('Could not record process for project/sample '
                              '{}/{}, workflow {}'.format(
                                  project, sample, workflow_subtask))
                    ## Question: should we just kill the run in this case or let it go?
                    continue
            except (NotImplementedError, RuntimeError, ValueError) as e:
                error_msg = ('Processing project "{}" / sample "{}" failed: '
                             '{}'.format(project, sample, e.__repr__()))
                LOG.error(error_msg)