Ejemplo n.º 1
0
 def test_find_fastq_read_pairs(self):
     # Test list functionality
     file_list = [
         'P123_456_AAAAAA_L001_R1_001.fastq.gz',
         'P123_456_AAAAAA_L001_R2_001.fastq.gz',
     ]
     expected_output = {'P123_456_AAAAAA_L001_': sorted(file_list)}
     self.assertEqual(expected_output,
                      parsers.find_fastq_read_pairs(file_list))
Ejemplo n.º 2
0
def analyze(project, sample, quiet=False, config=None, config_file_path=None):
    """The main entry point for the qc pipeline."""
    ## TODO implement "quiet" feature
    ## TODO implement mailing on failure
    LOG.info("Launching qc analysis for project/sample {}/{}".format(
        project, sample))

    project_analysis_path = os.path.join(project.base_path, "ANALYSIS",
                                         project.project_id, "qc_ngi")
    sample_analysis_path = os.path.join(project_analysis_path, sample.name)
    log_dir_path = os.path.join(project_analysis_path, "logs")
    safe_makedir(sample_analysis_path)
    safe_makedir(log_dir_path)

    fastq_files_to_process = []
    src_fastq_base = os.path.join(project.base_path, "DATA",
                                  project.project_id, sample.name)
    for libprep in sample:
        for seqrun in libprep:
            for fastq_file in seqrun:
                path_to_src_fastq = os.path.join(src_fastq_base, libprep.name,
                                                 seqrun.name, fastq_file)
                fastq_files_to_process.append(path_to_src_fastq)
    paired_fastq_files = list(
        find_fastq_read_pairs(fastq_files_to_process).values())
    qc_cl_list = return_cls_for_workflow("qc", paired_fastq_files,
                                         sample_analysis_path)

    sbatch_file_path = create_sbatch_file(qc_cl_list, project, sample, config)
    try:
        slurm_job_id = queue_sbatch_file(sbatch_file_path)
    except RuntimeError as e:
        LOG.error('Failed to queue qc sbatch file for project/sample '
                  '"{}"/"{}"!'.format(project, sample))
    else:
        LOG.info('Queued qc sbatch file for project/sample '
                 '"{}"/"{}": slurm job id {}'.format(project, sample,
                                                     slurm_job_id))
        slurm_jobid_file = os.path.join(
            log_dir_path, "{}-{}.slurmjobid".format(project.project_id,
                                                    sample))
        LOG.info('Writing slurm job id "{}" to file "{}"'.format(
            slurm_job_id, slurm_jobid_file))
        try:
            with open(slurm_jobid_file, 'w') as f:
                f.write("{}\n".format(slurm_job_id))
        except IOError as e:
            LOG.warning('Could not write slurm job id for project/sample '
                        '{}/{} to file "{}" ({})'.format(
                            project, sample, slurm_jobid_file, e))
Ejemplo n.º 3
0
def analyze(project, sample, config=None, config_file_path=None):
    """The main entry point for the qc pipeline."""
    LOG.info("Processing project/sample {}/{}".format(project, sample))

    # Two paths diverged in a yellow wood
    project_analysis_path = os.path.join(project.base_path,
                                         "ANALYSIS",
                                         project.project_id,
                                         "qc_ngi")
    # and sorry I could not travel both
    sample_analysis_path = os.path.join(project_analysis_path, sample.name)
    # and be one traveler, long I stood
    log_dir_path = os.path.join(project_analysis_path, "logs")
    # and looked down one as far as I could
    safe_makedir(sample_analysis_path)
    # To where it bent in the undergrowth
    safe_makedir(log_dir_path)
    # I need to go to sleep

    fastq_files_to_process = []
    # I suppose I -should- have quoted the other one
    src_fastq_base = os.path.join(project.base_path, "DATA",
                                  project.project_id, sample.name)
    # Whose woods these are I think I know
    for libprep in sample:
        # His house is in the village though
        for seqrun in libprep:
            # He will not see mt stopping here
            for fastq_file in seqrun:
                # To watch
                path_to_src_fastq = os.path.join(src_fastq_base,
                                                 libprep.name,
                                                 seqrun.name,
                                                 fastq_file)
                # his woods
                fastq_files_to_process.append(path_to_src_fastq)
    # fill up
    paired_fastq_files = find_fastq_read_pairs(fastq_files_to_process).values()
    # with snow
    qc_cl_list = return_cls_for_workflow("qc", paired_fastq_files, sample_analysis_path)

    sbatch_file_path = create_sbatch_file(qc_cl_list, project, sample, config)
    try:
        slurm_job_id = queue_sbatch_file(sbatch_file_path) 
    except RuntimeError as e:
        LOG.error('Failed to queue qc sbatch file for project/sample '
                  '"{}"/"{}"!'.format(project, sample))
    else:
        LOG.info('Queued qc sbatch file for project/sample '
                 '"{}"/"{}": slurm job id {}'.format(project, sample, slurm_job_id))
Ejemplo n.º 4
0
def analyze(project, sample, quiet=False, config=None, config_file_path=None):
    """The main entry point for the qc pipeline."""
    ## TODO implement "quiet" feature
    ## TODO implement mailing on failure
    LOG.info("Launching qc analysis for project/sample {}/{}".format(project, sample))

    project_analysis_path = os.path.join(project.base_path,
                                         "ANALYSIS",
                                         project.project_id,
                                         "qc_ngi")
    sample_analysis_path = os.path.join(project_analysis_path, sample.name)
    log_dir_path = os.path.join(project_analysis_path, "logs")
    safe_makedir(sample_analysis_path)
    safe_makedir(log_dir_path)

    fastq_files_to_process = []
    src_fastq_base = os.path.join(project.base_path, "DATA",
                                  project.project_id, sample.name)
    for libprep in sample:
        for seqrun in libprep:
            for fastq_file in seqrun:
                path_to_src_fastq = os.path.join(src_fastq_base,
                                                 libprep.name,
                                                 seqrun.name,
                                                 fastq_file)
                fastq_files_to_process.append(path_to_src_fastq)
    paired_fastq_files = find_fastq_read_pairs(fastq_files_to_process).values()
    qc_cl_list = return_cls_for_workflow("qc", paired_fastq_files, sample_analysis_path)

    sbatch_file_path = create_sbatch_file(qc_cl_list, project, sample, config)
    try:
        slurm_job_id = queue_sbatch_file(sbatch_file_path)
    except RuntimeError as e:
        LOG.error('Failed to queue qc sbatch file for project/sample '
                  '"{}"/"{}"!'.format(project, sample))
    else:
        LOG.info('Queued qc sbatch file for project/sample '
                 '"{}"/"{}": slurm job id {}'.format(project, sample, slurm_job_id))
        slurm_jobid_file = os.path.join(log_dir_path,
                                        "{}-{}.slurmjobid".format(project.project_id,
                                                                  sample))
        LOG.info('Writing slurm job id "{}" to file "{}"'.format(slurm_job_id,
                                                                 slurm_jobid_file))
        try:
            with open(slurm_jobid_file, 'w') as f:
                f.write("{}\n".format(slurm_job_id))
        except IOError as e:
            LOG.warn('Could not write slurm job id for project/sample '
                     '{}/{} to file "{}" ({}). So... yup. Good luck bro!'.format(e))
Ejemplo n.º 5
0
def analyze(project, sample, config=None, config_file_path=None):
    """The main entry point for the qc pipeline."""
    LOG.info("Processing project/sample {}/{}".format(project, sample))

    # Two paths diverged in a yellow wood
    project_analysis_path = os.path.join(project.base_path, "ANALYSIS",
                                         project.project_id, "qc_ngi")
    # and sorry I could not travel both
    sample_analysis_path = os.path.join(project_analysis_path, sample.name)
    # and be one traveler, long I stood
    log_dir_path = os.path.join(project_analysis_path, "logs")
    # and looked down one as far as I could
    safe_makedir(sample_analysis_path)
    # To where it bent in the undergrowth
    safe_makedir(log_dir_path)
    # I need to go to sleep

    fastq_files_to_process = []
    # I suppose I -should- have quoted the other one
    src_fastq_base = os.path.join(project.base_path, "DATA",
                                  project.project_id, sample.name)
    # Whose woods these are I think I know
    for libprep in sample:
        # His house is in the village though
        for seqrun in libprep:
            # He will not see mt stopping here
            for fastq_file in seqrun:
                # To watch
                path_to_src_fastq = os.path.join(src_fastq_base, libprep.name,
                                                 seqrun.name, fastq_file)
                # his woods
                fastq_files_to_process.append(path_to_src_fastq)
    # fill up
    paired_fastq_files = find_fastq_read_pairs(fastq_files_to_process).values()
    # with snow
    qc_cl_list = return_cls_for_workflow("qc", paired_fastq_files,
                                         sample_analysis_path)

    sbatch_file_path = create_sbatch_file(qc_cl_list, project, sample, config)
    try:
        slurm_job_id = queue_sbatch_file(sbatch_file_path)
    except RuntimeError as e:
        LOG.error('Failed to queue qc sbatch file for project/sample '
                  '"{}"/"{}"!'.format(project, sample))
    else:
        LOG.info('Queued qc sbatch file for project/sample '
                 '"{}"/"{}": slurm job id {}'.format(project, sample,
                                                     slurm_job_id))
 def test_find_fastq_read_pairs(self):
     # Test list functionality
     file_list = [ "P123_456_AAAAAA_L001_R1_001.fastq.gz",
                   "P123_456_AAAAAA_L001_R2_001.fastq.gz",]
     expected_output = {"P123_456_AAAAAA_L001_": sorted(file_list) }
     self.assertEqual(expected_output, find_fastq_read_pairs(file_list))