def test_find_fastq_read_pairs(self): # Test list functionality file_list = [ 'P123_456_AAAAAA_L001_R1_001.fastq.gz', 'P123_456_AAAAAA_L001_R2_001.fastq.gz', ] expected_output = {'P123_456_AAAAAA_L001_': sorted(file_list)} self.assertEqual(expected_output, parsers.find_fastq_read_pairs(file_list))
def analyze(project, sample, quiet=False, config=None, config_file_path=None): """The main entry point for the qc pipeline.""" ## TODO implement "quiet" feature ## TODO implement mailing on failure LOG.info("Launching qc analysis for project/sample {}/{}".format( project, sample)) project_analysis_path = os.path.join(project.base_path, "ANALYSIS", project.project_id, "qc_ngi") sample_analysis_path = os.path.join(project_analysis_path, sample.name) log_dir_path = os.path.join(project_analysis_path, "logs") safe_makedir(sample_analysis_path) safe_makedir(log_dir_path) fastq_files_to_process = [] src_fastq_base = os.path.join(project.base_path, "DATA", project.project_id, sample.name) for libprep in sample: for seqrun in libprep: for fastq_file in seqrun: path_to_src_fastq = os.path.join(src_fastq_base, libprep.name, seqrun.name, fastq_file) fastq_files_to_process.append(path_to_src_fastq) paired_fastq_files = list( find_fastq_read_pairs(fastq_files_to_process).values()) qc_cl_list = return_cls_for_workflow("qc", paired_fastq_files, sample_analysis_path) sbatch_file_path = create_sbatch_file(qc_cl_list, project, sample, config) try: slurm_job_id = queue_sbatch_file(sbatch_file_path) except RuntimeError as e: LOG.error('Failed to queue qc sbatch file for project/sample ' '"{}"/"{}"!'.format(project, sample)) else: LOG.info('Queued qc sbatch file for project/sample ' '"{}"/"{}": slurm job id {}'.format(project, sample, slurm_job_id)) slurm_jobid_file = os.path.join( log_dir_path, "{}-{}.slurmjobid".format(project.project_id, sample)) LOG.info('Writing slurm job id "{}" to file "{}"'.format( slurm_job_id, slurm_jobid_file)) try: with open(slurm_jobid_file, 'w') as f: f.write("{}\n".format(slurm_job_id)) except IOError as e: LOG.warning('Could not write slurm job id for project/sample ' '{}/{} to file "{}" ({})'.format( project, sample, slurm_jobid_file, e))
def analyze(project, sample, config=None, config_file_path=None): """The main entry point for the qc pipeline.""" LOG.info("Processing project/sample {}/{}".format(project, sample)) # Two paths diverged in a yellow wood project_analysis_path = os.path.join(project.base_path, "ANALYSIS", project.project_id, "qc_ngi") # and sorry I could not travel both sample_analysis_path = os.path.join(project_analysis_path, sample.name) # and be one traveler, long I stood log_dir_path = os.path.join(project_analysis_path, "logs") # and looked down one as far as I could safe_makedir(sample_analysis_path) # To where it bent in the undergrowth safe_makedir(log_dir_path) # I need to go to sleep fastq_files_to_process = [] # I suppose I -should- have quoted the other one src_fastq_base = os.path.join(project.base_path, "DATA", project.project_id, sample.name) # Whose woods these are I think I know for libprep in sample: # His house is in the village though for seqrun in libprep: # He will not see mt stopping here for fastq_file in seqrun: # To watch path_to_src_fastq = os.path.join(src_fastq_base, libprep.name, seqrun.name, fastq_file) # his woods fastq_files_to_process.append(path_to_src_fastq) # fill up paired_fastq_files = find_fastq_read_pairs(fastq_files_to_process).values() # with snow qc_cl_list = return_cls_for_workflow("qc", paired_fastq_files, sample_analysis_path) sbatch_file_path = create_sbatch_file(qc_cl_list, project, sample, config) try: slurm_job_id = queue_sbatch_file(sbatch_file_path) except RuntimeError as e: LOG.error('Failed to queue qc sbatch file for project/sample ' '"{}"/"{}"!'.format(project, sample)) else: LOG.info('Queued qc sbatch file for project/sample ' '"{}"/"{}": slurm job id {}'.format(project, sample, slurm_job_id))
def analyze(project, sample, quiet=False, config=None, config_file_path=None): """The main entry point for the qc pipeline.""" ## TODO implement "quiet" feature ## TODO implement mailing on failure LOG.info("Launching qc analysis for project/sample {}/{}".format(project, sample)) project_analysis_path = os.path.join(project.base_path, "ANALYSIS", project.project_id, "qc_ngi") sample_analysis_path = os.path.join(project_analysis_path, sample.name) log_dir_path = os.path.join(project_analysis_path, "logs") safe_makedir(sample_analysis_path) safe_makedir(log_dir_path) fastq_files_to_process = [] src_fastq_base = os.path.join(project.base_path, "DATA", project.project_id, sample.name) for libprep in sample: for seqrun in libprep: for fastq_file in seqrun: path_to_src_fastq = os.path.join(src_fastq_base, libprep.name, seqrun.name, fastq_file) fastq_files_to_process.append(path_to_src_fastq) paired_fastq_files = find_fastq_read_pairs(fastq_files_to_process).values() qc_cl_list = return_cls_for_workflow("qc", paired_fastq_files, sample_analysis_path) sbatch_file_path = create_sbatch_file(qc_cl_list, project, sample, config) try: slurm_job_id = queue_sbatch_file(sbatch_file_path) except RuntimeError as e: LOG.error('Failed to queue qc sbatch file for project/sample ' '"{}"/"{}"!'.format(project, sample)) else: LOG.info('Queued qc sbatch file for project/sample ' '"{}"/"{}": slurm job id {}'.format(project, sample, slurm_job_id)) slurm_jobid_file = os.path.join(log_dir_path, "{}-{}.slurmjobid".format(project.project_id, sample)) LOG.info('Writing slurm job id "{}" to file "{}"'.format(slurm_job_id, slurm_jobid_file)) try: with open(slurm_jobid_file, 'w') as f: f.write("{}\n".format(slurm_job_id)) except IOError as e: LOG.warn('Could not write slurm job id for project/sample ' '{}/{} to file "{}" ({}). So... yup. Good luck bro!'.format(e))
def analyze(project, sample, config=None, config_file_path=None): """The main entry point for the qc pipeline.""" LOG.info("Processing project/sample {}/{}".format(project, sample)) # Two paths diverged in a yellow wood project_analysis_path = os.path.join(project.base_path, "ANALYSIS", project.project_id, "qc_ngi") # and sorry I could not travel both sample_analysis_path = os.path.join(project_analysis_path, sample.name) # and be one traveler, long I stood log_dir_path = os.path.join(project_analysis_path, "logs") # and looked down one as far as I could safe_makedir(sample_analysis_path) # To where it bent in the undergrowth safe_makedir(log_dir_path) # I need to go to sleep fastq_files_to_process = [] # I suppose I -should- have quoted the other one src_fastq_base = os.path.join(project.base_path, "DATA", project.project_id, sample.name) # Whose woods these are I think I know for libprep in sample: # His house is in the village though for seqrun in libprep: # He will not see mt stopping here for fastq_file in seqrun: # To watch path_to_src_fastq = os.path.join(src_fastq_base, libprep.name, seqrun.name, fastq_file) # his woods fastq_files_to_process.append(path_to_src_fastq) # fill up paired_fastq_files = find_fastq_read_pairs(fastq_files_to_process).values() # with snow qc_cl_list = return_cls_for_workflow("qc", paired_fastq_files, sample_analysis_path) sbatch_file_path = create_sbatch_file(qc_cl_list, project, sample, config) try: slurm_job_id = queue_sbatch_file(sbatch_file_path) except RuntimeError as e: LOG.error('Failed to queue qc sbatch file for project/sample ' '"{}"/"{}"!'.format(project, sample)) else: LOG.info('Queued qc sbatch file for project/sample ' '"{}"/"{}": slurm job id {}'.format(project, sample, slurm_job_id))
def test_find_fastq_read_pairs(self): # Test list functionality file_list = [ "P123_456_AAAAAA_L001_R1_001.fastq.gz", "P123_456_AAAAAA_L001_R2_001.fastq.gz",] expected_output = {"P123_456_AAAAAA_L001_": sorted(file_list) } self.assertEqual(expected_output, find_fastq_read_pairs(file_list))