def test_fastq_path(self): """ test that the components of the path to the files are in place """ files = find_fastqs(library='NA12877-N-03098121-TD1-TT1', libdir='tests/libraries') self.assertIn(self.libdir, files[0][0]) self.assertIn(self.library, files[0][0])
def data_available_for_clinseq_barcode(libdir, clinseq_barcode): """ Check that data is available for the specified clinseq barcode in the specified library folder. :param libdir: Directory name where fastqs are organised. :param clinseq_barcode: A valid clinseq barcode string :return: True if data is available, False otherwise """ if not clinseq_barcode_is_valid(clinseq_barcode): raise ValueError("Invalid clinseq barcode: " + clinseq_barcode) filedir = os.path.join(libdir, clinseq_barcode) if not os.path.exists(filedir): logging.warn( "Dir {} does not exists for {}. Not using library.".format( filedir, clinseq_barcode)) return False if find_fastqs(clinseq_barcode, libdir) == (None, None): logging.warn("No fastq files found for {} in dir {}".format( clinseq_barcode, filedir)) return False logging.debug("Library {} has data. Using it.".format(clinseq_barcode)) return True
def test_find_fq_gz(self): """ test that files on the format *_1.fq.gz / *_2.fq.gz are found """ files = find_fastqs(library=self.library, libdir=self.libdir) files_basenames = [os.path.basename(f) for f in files[0]] + [os.path.basename(f) for f in files[1]] self.assertIn('bar_1.fq.gz', files_basenames) self.assertIn('bar_2.fq.gz', files_basenames)
def configure_umi_processing(self): # configure for UMI SNV calling pipeline # capture_to_barcodes = self.get_unique_capture_to_clinseq_barcodes() for unique_capture in capture_to_barcodes.keys(): capture_kit = unique_capture.capture_kit_id for clinseq_barcode in capture_to_barcodes[unique_capture]: trimmed_fqfiles = fq_trimming( self, fq1_files=find_fastqs(clinseq_barcode, self.libdir)[0], fq2_files=find_fastqs(clinseq_barcode, self.libdir)[1], clinseq_barcode=clinseq_barcode, ref=self.refdata['bwaIndex'], outdir="{}/bams/{}".format(self.outdir, capture_kit), maxcores=self.maxcores) bam_file = self.configure_fastq_to_bam( fq_files=trimmed_fqfiles, clinseq_barcode=clinseq_barcode, capture_kit=capture_kit) realigned_bam = self.configure_alignment_with_umi( bamfile=bam_file, clinseq_barcode=clinseq_barcode, capture_kit=capture_kit, jobname='1') consensus_reads = self.configure_consensus_reads_calling( bam=realigned_bam, clinseq_barcode=clinseq_barcode, capture_kit=capture_kit) realigned_bam2 = self.configure_alignment_with_umi( bamfile=consensus_reads, clinseq_barcode=clinseq_barcode, capture_kit=capture_kit, jobname='2') filtered_bam = self.configure_consensus_read_filter( bam=realigned_bam2, clinseq_barcode=clinseq_barcode, capture_kit=capture_kit) clip_overlap_bam = self.configure_clip_overlapping( bam=filtered_bam, clinseq_barcode=clinseq_barcode, capture_kit=capture_kit) mark_dups_bam = self.configure_markdups( bamfile=realigned_bam, unique_capture=unique_capture) self.set_capture_bam(unique_capture, filtered_bam, self.umi)
def test_find_RN_DDD(self): """ test that files on the format *R1_nnn.fastq.gz/*R2_nnn.fastq.gz are found """ files = find_fastqs(library=self.library, libdir=self.libdir) files_basenames = [os.path.basename(f) for f in files[0]] + [os.path.basename(f) for f in files[1]] self.assertIn('baz_R1_001.fastq.gz', files_basenames) self.assertIn('baz_R2_001.fastq.gz', files_basenames) self.assertIn('baz_R1_999.fastq.gz', files_basenames) self.assertIn('baz_R2_999.fastq.gz', files_basenames)
def test_find_fastqs_for_no_library(self): """ test that find_fastqs return (None,None) if called with library=None """ files = find_fastqs(library=None, libdir=self.libdir) self.assertEqual(files, (None, None))