def test_fastq_path(self):
     """
     test that the components of the path to the files are in place
     """
     files = find_fastqs(library='NA12877-N-03098121-TD1-TT1', libdir='tests/libraries')
     self.assertIn(self.libdir, files[0][0])
     self.assertIn(self.library, files[0][0])
Exemple #2
0
def data_available_for_clinseq_barcode(libdir, clinseq_barcode):
    """
    Check that data is available for the specified clinseq barcode in the specified library folder.

    :param libdir: Directory name where fastqs are organised.
    :param clinseq_barcode: A valid clinseq barcode string
    :return: True if data is available, False otherwise
    """

    if not clinseq_barcode_is_valid(clinseq_barcode):
        raise ValueError("Invalid clinseq barcode: " + clinseq_barcode)

    filedir = os.path.join(libdir, clinseq_barcode)
    if not os.path.exists(filedir):
        logging.warn(
            "Dir {} does not exists for {}. Not using library.".format(
                filedir, clinseq_barcode))
        return False
    if find_fastqs(clinseq_barcode, libdir) == (None, None):
        logging.warn("No fastq files found for {} in dir {}".format(
            clinseq_barcode, filedir))
        return False

    logging.debug("Library {} has data. Using it.".format(clinseq_barcode))
    return True
 def test_find_fq_gz(self):
     """
     test that files on the format *_1.fq.gz / *_2.fq.gz are found
     """
     files = find_fastqs(library=self.library, libdir=self.libdir)
     files_basenames = [os.path.basename(f) for f in files[0]] + [os.path.basename(f) for f in files[1]]
     self.assertIn('bar_1.fq.gz', files_basenames)
     self.assertIn('bar_2.fq.gz', files_basenames)
Exemple #4
0
    def configure_umi_processing(self):
        # configure for UMI SNV calling pipeline
        #
        capture_to_barcodes = self.get_unique_capture_to_clinseq_barcodes()
        for unique_capture in capture_to_barcodes.keys():
            capture_kit = unique_capture.capture_kit_id
            for clinseq_barcode in capture_to_barcodes[unique_capture]:
                trimmed_fqfiles = fq_trimming(
                    self,
                    fq1_files=find_fastqs(clinseq_barcode, self.libdir)[0],
                    fq2_files=find_fastqs(clinseq_barcode, self.libdir)[1],
                    clinseq_barcode=clinseq_barcode,
                    ref=self.refdata['bwaIndex'],
                    outdir="{}/bams/{}".format(self.outdir, capture_kit),
                    maxcores=self.maxcores)

            bam_file = self.configure_fastq_to_bam(
                fq_files=trimmed_fqfiles,
                clinseq_barcode=clinseq_barcode,
                capture_kit=capture_kit)
            realigned_bam = self.configure_alignment_with_umi(
                bamfile=bam_file,
                clinseq_barcode=clinseq_barcode,
                capture_kit=capture_kit,
                jobname='1')
            consensus_reads = self.configure_consensus_reads_calling(
                bam=realigned_bam,
                clinseq_barcode=clinseq_barcode,
                capture_kit=capture_kit)
            realigned_bam2 = self.configure_alignment_with_umi(
                bamfile=consensus_reads,
                clinseq_barcode=clinseq_barcode,
                capture_kit=capture_kit,
                jobname='2')
            filtered_bam = self.configure_consensus_read_filter(
                bam=realigned_bam2,
                clinseq_barcode=clinseq_barcode,
                capture_kit=capture_kit)
            clip_overlap_bam = self.configure_clip_overlapping(
                bam=filtered_bam,
                clinseq_barcode=clinseq_barcode,
                capture_kit=capture_kit)
            mark_dups_bam = self.configure_markdups(
                bamfile=realigned_bam, unique_capture=unique_capture)

            self.set_capture_bam(unique_capture, filtered_bam, self.umi)
 def test_find_RN_DDD(self):
     """
     test that files on the format *R1_nnn.fastq.gz/*R2_nnn.fastq.gz are found
     """
     files = find_fastqs(library=self.library, libdir=self.libdir)
     files_basenames = [os.path.basename(f) for f in files[0]] + [os.path.basename(f) for f in files[1]]
     self.assertIn('baz_R1_001.fastq.gz', files_basenames)
     self.assertIn('baz_R2_001.fastq.gz', files_basenames)
     self.assertIn('baz_R1_999.fastq.gz', files_basenames)
     self.assertIn('baz_R2_999.fastq.gz', files_basenames)
 def test_find_fastqs_for_no_library(self):
     """
     test that find_fastqs return (None,None) if called with library=None
     """
     files = find_fastqs(library=None, libdir=self.libdir)
     self.assertEqual(files, (None, None))