Example #1
0
 def test_overlapping_alignments_2(self):
     """Extraction of overlapping reads - with a non-default
     minimal overlap.
     """
     self._generate_bam_file(
         self.example_data.sam_content_1, self._sam_bam_prefix)
     self.gene_wise_quantification = GeneWiseQuantification()
     self.gene_wise_quantification._min_overlap = 5
     sam = pysam.Samfile(self._sam_bam_prefix + ".bam")
     # 1 overlapping base in the 5' end of the reads => not enough
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 1, 10))), [])
     # 4 overlapping base in the 5' end of the reads => not enough
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 1, 13))), [])
     # 5 overlapping base in the 5' end of the reads => okay
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 1, 14))),
         ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
     # 1 overlapping base in the 3' end of the reads => not enough
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 19, 23))), [])
     # 4 overlapping base in the 3' end of the reads => not enough
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 16, 23))), [])
     # 5 overlapping base in the 3' end of the reads => not enough
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 15, 23))),
         ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
Example #2
0
 def _quantify_gene_wise(
         self, lib_name, read_alignment_path,
         norm_by_alignment_freq,  norm_by_overlap_freq,
         annotation_files):
     """Perform the gene wise quantification for a given library."""
     gene_quanti_paths = [
         self._paths.gene_quanti_path(lib_name, annotation_file)
         for annotation_file in annotation_files]
     # Check if all output files for this library exist - if so
     # skip their creation
     if not any([self._file_needs_to_be_created(
             gene_quanti_path, quiet=True)
             for gene_quanti_path in gene_quanti_paths]):
         sys.stderr.write(
             "The file(s) %s exist(s). Skipping their/its generation.\n" %
             ", " .join(gene_quanti_paths))
         return
     gene_wise_quantification = GeneWiseQuantification(
         min_overlap=self._args.min_overlap,
         read_region=self._args.read_region,
         clip_length=self._args.clip_length,
         norm_by_alignment_freq=norm_by_alignment_freq,
         norm_by_overlap_freq=norm_by_overlap_freq,
         allowed_features_str=self._args.allowed_features,
         skip_antisense=self._args.skip_antisense,
         unique_only=self._args.unique_only)
     gene_wise_quantification.calc_overlaps_per_alignment(
         read_alignment_path, self._paths.annotation_paths)
     for annotation_file, annotation_path in zip(
             annotation_files, self._paths.annotation_paths):
         gene_wise_quantification.quantify(
             read_alignment_path, annotation_path,
             self._paths.gene_quanti_path(
                 lib_name, annotation_file), self._args.pseudocounts)
Example #3
0
 def test_overlapping_alignments_1(self):
     self._generate_bam_file(
         self.example_data.sam_content_1, self._sam_bam_prefix)
     self.gene_wise_quantification = GeneWiseQuantification()
     sam = pysam.Samfile(self._sam_bam_prefix + ".bam")
     # Overlap with all mappings
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 1, 100))),
         ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05",
          "myread:06", "myread:07", "myread:08", "myread:09", "myread:10"])
     # Overlapping with no mapping
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 1, 5))), [])
     # Overlapping by 1 based - in the 5' end of the reads
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 1, 10))),
         ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
     # No overlap - gene very close upstream of the reads
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 1, 9))), [])
     # Overlapping by 1 based - in the 3' end of the reads
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 19, 23))),
         ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
     # No overlap - very close downstream of the reads
     self.assertListEqual(self._mapping_ids(
         self.gene_wise_quantification._overlapping_alignments(
             sam, Gff3EntryMoc("chrom", 20, 23))), [])
 def test_overlapping_alignments_1(self):
     self._generate_bam_file(self.example_data.sam_content_1,
                             self._sam_bam_prefix)
     self.gene_wise_quantification = GeneWiseQuantification()
     sam = pysam.Samfile(self._sam_bam_prefix + ".bam")
     # Overlap with all mappings on the forward strand
     self.assertListEqual(
         self._mapping_ids(
             self.gene_wise_quantification._overlapping_alignments(
                 sam, Gff3EntryMoc("chrom", 1, 100, "+"))),
         ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
     # Overlapping with no mapping
     self.assertListEqual(
         self._mapping_ids(
             self.gene_wise_quantification._overlapping_alignments(
                 sam, Gff3EntryMoc("chrom", 1, 5, "+"))), [])
     # Overlapping by 1 based - in the 5' end of the reads
     self.assertListEqual(
         self._mapping_ids(
             self.gene_wise_quantification._overlapping_alignments(
                 sam, Gff3EntryMoc("chrom", 1, 10, "+"))),
         ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
     # No overlap - gene very close upstream of the reads
     self.assertListEqual(
         self._mapping_ids(
             self.gene_wise_quantification._overlapping_alignments(
                 sam, Gff3EntryMoc("chrom", 1, 9, "+"))), [])
     # Overlapping by 1 based - in the 3' end of the reads
     self.assertListEqual(
         self._mapping_ids(
             self.gene_wise_quantification._overlapping_alignments(
                 sam, Gff3EntryMoc("chrom", 19, 23, "+"))),
         ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
     # Overlapping by 1 based - in the 3' end of the reads but on the wrong strand
     self.assertListEqual(
         self._mapping_ids(
             self.gene_wise_quantification._overlapping_alignments(
                 sam, Gff3EntryMoc("chrom", 19, 23, "-"))), [])
     # No overlap - very close downstream of the reads
     self.assertListEqual(
         self._mapping_ids(
             self.gene_wise_quantification._overlapping_alignments(
                 sam, Gff3EntryMoc("chrom", 20, 23, "+"))), [])
     # Overlapping by 1 based - in the 3' end of the reads on the opposite strand, without strand specificity
     self.gene_wise_quantification._strand_specific = False
     self.assertListEqual(
         self._mapping_ids(
             self.gene_wise_quantification._overlapping_alignments(
                 sam, Gff3EntryMoc("chrom", 19, 23, "-"))),
         ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
     # Overlapping by 1 based - in the 3' end of the reads on the opposite strand, with strand specificity
     # only allowing antisense overlaps
     self.gene_wise_quantification._strand_specific = True
     self.gene_wise_quantification._antisense_only = True
     self.assertListEqual(
         self._mapping_ids(
             self.gene_wise_quantification._overlapping_alignments(
                 sam, Gff3EntryMoc("chrom", 19, 100, "-"))),
         ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
Example #5
0
 def _quantify_gene_wise(
         self, lib_name, read_alignment_path,
         norm_by_alignment_freq,  norm_by_overlap_freq,
         annotation_files):
     """Perform the gene wise quantification for a given library."""
     gene_quanti_paths = [
         self._paths.gene_quanti_path(lib_name, annotation_file)
         for annotation_file in annotation_files]
     # Check if all output files for this library exist - if so
     # skip their creation
     if not any([self._file_needs_to_be_created(
             gene_quanti_path, quiet=True)
             for gene_quanti_path in gene_quanti_paths]):
         sys.stderr.write(
             "The file(s) %s exist(s). Skipping their/its generation.\n" %
             ", " .join(gene_quanti_paths))
         return
     gene_wise_quantification = GeneWiseQuantification(
         min_overlap=self._args.min_overlap,
         read_region=self._args.read_region,
         clip_length=self._args.clip_length,
         norm_by_alignment_freq=norm_by_alignment_freq,
         norm_by_overlap_freq=norm_by_overlap_freq,
         allowed_features_str=self._args.allowed_features,
         skip_antisense=self._args.skip_antisense,
         unique_only=self._args.unique_only)
     if norm_by_overlap_freq:
         gene_wise_quantification.calc_overlaps_per_alignment(
             read_alignment_path, self._paths.annotation_paths)
     for annotation_file, annotation_path in zip(
             annotation_files, self._paths.annotation_paths):
         gene_wise_quantification.quantify(
             read_alignment_path, annotation_path,
             self._paths.gene_quanti_path(
                 lib_name, annotation_file), self._args.pseudocounts)
def data_gene_wise_quanti():
    gene_wise_quantification = GeneWiseQuantification()
    sam_bam_prefix = "dummy"
    sam_content = """@HD	VN:1.0
@SQ	SN:chrom	LN:1500
@SQ	SN:plasmid1	LN:100
@SQ	SN:plasmid2	LN:200
myread:01	0	chrom	10	255	10M	*	0	0	GTGGACAACC	*	NM:i:1	MD:Z:11T3	NH:i:1	XI:i:1	XA:Z:Q
myread:02	0	chrom	10	255	10M	*	0	0	GTGGACAACC	*	NM:i:1	MD:Z:11T3	NH:i:1	XI:i:1	XA:Z:Q
myread:03	0	chrom	10	255	10M	*	0	0	GTGGACAACC	*	NM:i:1	MD:Z:11T3	NH:i:1	XI:i:1	XA:Z:Q
myread:04	0	chrom	10	255	10M	*	0	0	GTGGACAACC	*	NM:i:1	MD:Z:11T3	NH:i:1	XI:i:1	XA:Z:Q
myread:05	0	chrom	10	255	10M	*	0	0	GTGGACAACC	*	NM:i:1	MD:Z:11T3	NH:i:1	XI:i:1	XA:Z:Q
myread:06	16	chrom	35	255	10M	*	0	0	GTGGACAACC	*	NM:i:1	MD:Z:11T3	NH:i:1	XI:i:1	XA:Z:Q
myread:07	16	chrom	35	255	10M	*	0	0	GTGGACAACC	*	NM:i:1	MD:Z:11T3	NH:i:1	XI:i:1	XA:Z:Q
myread:08	16	chrom	35	255	10M	*	0	0	GTGGACAACC	*	NM:i:1	MD:Z:11T3	NH:i:1	XI:i:1	XA:Z:Q
myread:09	16	chrom	35	255	10M	*	0	0	GTGGACAACC	*	NM:i:1	MD:Z:11T3	NH:i:1	XI:i:1	XA:Z:Q
myread:10	16	chrom	35	255	10M	*	0	0	GTGGACAACC	*	NM:i:1	MD:Z:11T3	NH:i:1	XI:i:1	XA:Z:Q
"""

    global gene_wise_quantification
    global sam_bam_prefix
    global sam_content
 def setUp(self):
     self.example_data = ExampleData()
     self._sam_bam_prefix = "dummy"
     self.gene_wise_quantification = GeneWiseQuantification()
class TestGeneWiseQuantification(unittest.TestCase):
    def setUp(self):
        self.example_data = ExampleData()
        self._sam_bam_prefix = "dummy"
        self.gene_wise_quantification = GeneWiseQuantification()

    def tearDown(self):
        for suffix in [".sam", ".bam", ".bam.bai"]:
            os.remove(self._sam_bam_prefix + suffix)

    def test_overlapping_alignments_1(self):
        self._generate_bam_file(self.example_data.sam_content_1,
                                self._sam_bam_prefix)
        self.gene_wise_quantification = GeneWiseQuantification()
        sam = pysam.Samfile(self._sam_bam_prefix + ".bam")
        # Overlap with all mappings
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 1, 100))), [
                        "myread:01", "myread:02", "myread:03", "myread:04",
                        "myread:05", "myread:06", "myread:07", "myread:08",
                        "myread:09", "myread:10"
                    ])
        # Overlapping with no mapping
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 1, 5))), [])
        # Overlapping by 1 based - in the 5' end of the reads
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 1, 10))),
            ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
        # No overlap - gene very close upstream of the reads
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 1, 9))), [])
        # Overlapping by 1 based - in the 3' end of the reads
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 19, 23))),
            ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
        # No overlap - very close downstream of the reads
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 20, 23))), [])

    def test_overlapping_alignments_2(self):
        """Extraction of overlapping reads - with a non-default
        minimal overlap.
        """
        self._generate_bam_file(self.example_data.sam_content_1,
                                self._sam_bam_prefix)
        self.gene_wise_quantification = GeneWiseQuantification()
        self.gene_wise_quantification._min_overlap = 5
        sam = pysam.Samfile(self._sam_bam_prefix + ".bam")
        # 1 overlapping base in the 5' end of the reads => not enough
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 1, 10))), [])
        # 4 overlapping base in the 5' end of the reads => not enough
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 1, 13))), [])
        # 5 overlapping base in the 5' end of the reads => okay
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 1, 14))),
            ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
        # 1 overlapping base in the 3' end of the reads => not enough
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 19, 23))), [])
        # 4 overlapping base in the 3' end of the reads => not enough
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 16, 23))), [])
        # 5 overlapping base in the 3' end of the reads => not enough
        self.assertListEqual(
            self._mapping_ids(
                self.gene_wise_quantification._overlapping_alignments(
                    sam, Gff3EntryMoc("chrom", 15, 23))),
            ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])

    def _mapping_ids(self, mappings):
        return [mapping.qname for mapping in mappings]

    def _generate_bam_file(self, sam_content, file_prefix):
        sam_file = "{}.sam".format(file_prefix)
        bam_file = "{}.bam".format(file_prefix)
        sam_fh = open(sam_file, "w")
        sam_fh.write(sam_content)
        sam_fh.close()
        pysam.view("-Sb",
                   "-o{}".format(bam_file),
                   sam_file,
                   catch_stdout=False)
        pysam.index(bam_file)
 def setUp(self):
     self.example_data = ExampleData()
     self._sam_bam_prefix = "dummy"
     self.gene_wise_quantification = GeneWiseQuantification()
class TestGeneWiseQuantification(unittest.TestCase):

    def setUp(self):
        self.example_data = ExampleData()
        self._sam_bam_prefix = "dummy"
        self.gene_wise_quantification = GeneWiseQuantification()

    def tearDown(self):
        for suffix in [".sam", ".bam", ".bam.bai"]:
            os.remove(self._sam_bam_prefix + suffix)

    def test_overlapping_alignments_1(self):
        self._generate_bam_file(
            self.example_data.sam_content_1, self._sam_bam_prefix)
        self.gene_wise_quantification = GeneWiseQuantification()
        sam = pysam.Samfile(self._sam_bam_prefix + ".bam")
        # Overlap with all mappings
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 1, 100))),
            ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05",
             "myread:06", "myread:07", "myread:08", "myread:09", "myread:10"])
        # Overlapping with no mapping
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 1, 5))), [])
        # Overlapping by 1 based - in the 5' end of the reads
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 1, 10))),
            ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
        # No overlap - gene very close upstream of the reads
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 1, 9))), [])
        # Overlapping by 1 based - in the 3' end of the reads
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 19, 23))),
            ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
        # No overlap - very close downstream of the reads
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 20, 23))), [])

    def test_overlapping_alignments_2(self):
        """Extraction of overlapping reads - with a non-default
        minimal overlap.
        """
        self._generate_bam_file(
            self.example_data.sam_content_1, self._sam_bam_prefix)
        self.gene_wise_quantification = GeneWiseQuantification()
        self.gene_wise_quantification._min_overlap = 5
        sam = pysam.Samfile(self._sam_bam_prefix + ".bam")
        # 1 overlapping base in the 5' end of the reads => not enough
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 1, 10))), [])
        # 4 overlapping base in the 5' end of the reads => not enough
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 1, 13))), [])
        # 5 overlapping base in the 5' end of the reads => okay
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 1, 14))),
            ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])
        # 1 overlapping base in the 3' end of the reads => not enough
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 19, 23))), [])
        # 4 overlapping base in the 3' end of the reads => not enough
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 16, 23))), [])
        # 5 overlapping base in the 3' end of the reads => not enough
        self.assertListEqual(self._mapping_ids(
            self.gene_wise_quantification._overlapping_alignments(
                sam, Gff3EntryMoc("chrom", 15, 23))),
            ["myread:01", "myread:02", "myread:03", "myread:04", "myread:05"])

    def _mapping_ids(self, mappings):
        return [mapping.qname for mapping in mappings]

    def _generate_bam_file(self, sam_content, file_prefix):
        sam_file = "%s.sam" % file_prefix
        bam_file = "%s.bam" % file_prefix
        sam_fh = open(sam_file, "w")
        sam_fh.write(sam_content)
        sam_fh.close()
        pysam.view("-Sb", "-o%s" % bam_file, sam_file)
        pysam.index(bam_file)