Esempio n. 1
0
    def test_e2e(self):
        """
        From raw reads and ENSEMBL annotation to rnamaps.
        """

        # Make segmentation & regions file
        seg = get_temp_file_name(extension='gtf')
        out_dir = get_temp_dir()
        iCount.genomes.segment.get_segments(self.gtf, seg, self.fai)
        iCount.genomes.region.make_regions(seg, out_dir)
        regions = os.path.join(out_dir, iCount.genomes.region.REGIONS_FILE)

        # Build STAR index:
        genome_index = get_temp_dir()
        rcode = iCount.externals.star.build_index(self.fasta, genome_index, annotation=self.gtf)
        self.assertEqual(rcode, 0)
        # Map reads:
        map_dir = get_temp_dir()
        rcode = iCount.externals.star.map_reads(
            self.reads, genome_index, out_dir=map_dir, annotation=self.gtf)
        self.assertEqual(rcode, 0)

        # Get bam with mapped reads:
        bam = [fname for fname in os.listdir(map_dir) if fname.startswith('Aligned')][0]
        bam = os.path.join(map_dir, bam)

        sites_single = get_temp_file_name(extension='bed.gz')
        sites_multi = get_temp_file_name(extension='bed.gz')
        skipped = get_temp_file_name(extension='bam')
        iCount.mapping.xlsites.run(bam, sites_single, sites_multi, skipped)

        iCount.analysis.rnamaps.run(sites_single, regions)
Esempio n. 2
0
    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)

        # Temporary file names to use for output:
        self.tmp1 = get_temp_file_name()
        self.tmp2 = get_temp_file_name()
        self.dir = get_temp_dir()
        self.dir2 = get_temp_dir()

        self.cross_links = make_file_from_list([
            ['1', '16', '17', '.', '5', '+'],
            ['1', '14', '15', '.', '5', '+'],
            ['1', '15', '16', '.', '5', '+'],
        ],
                                               extension='bed')

        self.peaks = make_file_from_list([
            ['1', '15', '16', '.', '15', '+'],
        ])

        self.annotation = make_file_from_list([
            ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'ncRNA', '10', '20', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "B";'],
            ['1', '.', 'CDS', '10', '20', '.', '-', '.', 'biotype "C";'],
            ['1', '.', 'CDS', '12', '18', '.', '+', '.', 'biotype "A";'],
            ['1', '.', 'CDS', '30', '40', '.', '+', '.', 'biotype "D";'],
        ])

        self.gtf = make_file_from_list([
            ['1', '.', 'gene', '10', '20', '.', '+', '.', 'gene_id "A";'],
            [
                '1', '.', 'transcript', '10', '20', '.', '+', '.',
                'gene_id "A"; transcript_id "AA";'
            ],
            [
                '1', '.', 'exon', '10', '20', '.', '+', '.',
                'gene_id "A"; transcript_id "AA"; exon_number "1";'
            ],
        ])

        self.bam = make_bam_file(
            {
                'chromosomes': [
                    ('1', 3000),
                    ('2', 2000),
                ],
                'segments': [
                    ('name3:rbc:CCCC:', 0, 0, 100, 20, [(0, 100)], {
                        'NH': 1
                    }),
                    ('name4:ABC', 0, 0, 300, 20, [(0, 200)], {
                        'NH': 11
                    }),
                ]
            },
            rnd_seed=0)
Esempio n. 3
0
 def setUp(self):
     warnings.simplefilter("ignore", ResourceWarning)
     self.out_dir = get_temp_dir()
     self.type_header = ['Type', 'Length', 'cDNA #', 'cDNA %']
     self.subtype_header = ['Subtype', 'Length', 'cDNA #', 'cDNA %']
     self.gene_header = [
         'Gene name (Gene ID)', 'Length', 'cDNA #', 'cDNA %'
     ]
Esempio n. 4
0
 def setUp(self):
     self.dir = get_temp_dir()
     self.index_dir = get_temp_dir()
     self.genome = make_fasta_file(num_sequences=2, seq_len=1000)
     self.reads = make_fastq_file(genome=self.genome)
     self.annotation = make_file_from_list([
         ['1', '.', 'gene', '10', '20', '.', '+', '.', 'gene_id "A";'],
         [
             '1', '.', 'transcript', '10', '20', '.', '+', '.',
             'gene_id "A"; transcript_id "AA";'
         ],
         [
             '1', '.', 'exon', '10', '20', '.', '+', '.',
             'gene_id "A"; transcript_id "AA"; exon_number "1";'
         ],
     ])
     warnings.simplefilter("ignore", ResourceWarning)
Esempio n. 5
0
 def setUp(self):
     self.dir = get_temp_dir()
     self.adapter = 'CCCCCCCCC'
     self.barcodes = [
         'NNNGGTTNN',
         'NNNTTGTNN',
         'NNNCAATNN',
         'NNNACCTNN',
         'NNNGGCGNN',
     ]
     self.reads = make_fastq_file(barcodes=self.barcodes,
                                  adapter=self.adapter)
     warnings.simplefilter("ignore", ResourceWarning)
Esempio n. 6
0
    def test_templates1(self):
        out_dir = get_temp_dir()
        segmentation = make_file_from_list([
            ['1', '.', 'intergenic', '1', '10', '.', '+', '.', 'gene_id ".";'],
            [
                '1', '.', 'UTR3', '11', '20', '.', '+', '.',
                'biotype "mRNA";gene_name "ABC";gene_id "G1";'
            ],
            [
                '1', '.', 'intron', '21', '30', '.', '+', '.',
                'biotype "lncRNA";gene_name "ABC";gene_id "G1";'
            ],
            [
                '1', '.', 'CDS', '31', '40', '.', '+', '.',
                'biotype "mRNA";gene_name "DEF";gene_id "G2";'
            ],
            [
                '1', '.', 'intron', '41', '50', '.', '+', '.',
                'biotype "sRNA,lncRNA";gene_name "DEF"; gene_id "G2";'
            ],
        ])
        region.summary_templates(segmentation, out_dir)

        results_type = make_list_from_file(
            os.path.join(out_dir, region.TEMPLATE_TYPE), '\t')
        self.assertEqual(results_type, [
            ['CDS', '10'],
            ['UTR3', '10'],
            ['intron', '20'],
            ['intergenic', '10'],
        ])

        results_subtype = make_list_from_file(os.path.join(
            out_dir, region.TEMPLATE_SUBTYPE),
                                              fields_separator='\t')
        self.assertEqual(results_subtype, [
            ['CDS mRNA', '10'],
            ['UTR3 mRNA', '10'],
            ['intron lncRNA', '15'],
            ['intron sRNA', '5'],
            ['intergenic', '10'],
        ])

        results_gene = make_list_from_file(os.path.join(
            out_dir, region.TEMPLATE_GENE),
                                           fields_separator='\t')
        self.assertEqual(results_gene, [
            ['.', '', '10'],
            ['G1', 'ABC', '20'],
            ['G2', 'DEF', '20'],
        ])
Esempio n. 7
0
    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)

        self.dir = get_temp_dir()
        self.adapter = 'AAAAAAAAAA'
        self.barcodes5 = [
            'NNAAAN',
            'NGGGN',
            'NGGGN',
        ]
        self.barcodes3 = [
            '.',
            'NNGGG',
            'NCCC',
        ]
        # Header: early version Illumina header
        # Barcodes: exact match to the barcode set #1
        self.entry1 = FastqEntry(
            '@header1/1',
            'GGAAAG' + make_sequence(40) + self.adapter,
            '+',
            make_quality_scores(56),
        )
        # Header: contains id and description
        # Barcodes: one mismatch on 5' end for barcode set #2
        self.entry2 = FastqEntry(
            '@header2 blah',
            'AGGTA' + make_sequence(40) + 'AAGGG' + self.adapter,
            '+',
            make_quality_scores(60),
        )
        # Header: simple header
        # Barcodes: one mismatch on 3' end for barcode set #3
        self.entry3 = FastqEntry(
            '@header3',
            'TGGGT' + make_sequence(40) + 'TACC' + self.adapter,
            '+',
            make_quality_scores(59),
        )

        self.fq_fname = get_temp_file_name(extension='fq')
        self.fq_file = iCount.files.fastq.FastqFile(self.fq_fname, 'wt')
        for entry in [self.entry1, self.entry2, self.entry3]:
            self.fq_file.write(entry)
        self.fq_file.close()
Esempio n. 8
0
    def test_e2e(self):
        """
        From raw reads and ENSEMBL annotation to rnamaps.
        """

        # Make segmentation
        seg = get_temp_file_name(extension='gtf')
        iCount.genomes.segment.get_regions(self.gtf, seg, self.fai)

        # Build STAR index:
        genome_index = get_temp_dir()
        rcode = iCount.externals.star.build_index(self.fasta, genome_index, annotation=self.gtf)
        self.assertEqual(rcode, 0)
        # Map reads:
        map_dir = get_temp_dir()
        rcode = iCount.externals.star.map_reads(
            self.reads, genome_index, out_dir=map_dir, annotation=self.gtf)
        self.assertEqual(rcode, 0)

        # Get bam with mapped reads:
        bam = [fname for fname in os.listdir(map_dir) if fname.startswith('Aligned')][0]
        bam = os.path.join(map_dir, bam)

        # Make all sorts of analysis and save it:
        normal_out = get_temp_file_name(extension='tsv')
        strange_out = get_temp_file_name(extension='bam')
        cross_tr_out = get_temp_file_name(extension='tsv')
        iCount.analysis.rnamaps.run(bam, seg, normal_out, strange_out, cross_tr_out,
                                    implicit_handling='split')
        # Normal output:
        expected_out = [
            ['RNAmap', 'type', 'position', 'all', 'explicit'],
            ['CDS-CDS', '-40', '0.3333', '0'],
            ['CDS-UTR3', '-25', '0.25', '0'],
            ['CDS-intergenic', '-20', '1', '1'],
            ['CDS-intergenic', '30', '0.5', '0'],
            ['CDS-intergenic', '250', '1', '0'],
            ['CDS-intron', '-40', '0.3333', '0'],
            ['CDS-intron', '-25', '0.25', '0'],
            ['UTR5-CDS', '5', '0.25', '0'],
            ['UTR5-intron', '-10', '1', '1'],
            ['UTR5-intron', '-8', '2', '2'],
            ['UTR5-intron', '10', '0.5', '0'],
            ['UTR5-intron', '13', '1', '0'],
            ['intergenic-CDS', '-70', '0.5', '0'],
            ['intergenic-CDS', '10', '0.3333', '0'],
            ['intergenic-UTR5', '-20', '1', '1'],
            ['intron-CDS', '-40', '0.5', '0'],
            ['intron-CDS', '-37', '1', '0'],
            ['intron-CDS', '5', '0.25', '0'],
        ]
        self.assertEqual(expected_out, make_list_from_file(normal_out))

        # Cross transcript:
        expected_cross_transcript = [
            ['chrom', 'strand', 'xlink', 'second-start', 'end-position', 'read_len'],
            ['1', '+', '234', '236', '284', '50'],
        ]
        self.assertEqual(expected_cross_transcript, make_list_from_file(cross_tr_out))

        # Strange:
        strange_reads = list(pysam.AlignmentFile(strange_out, 'rb'))  # pylint: disable=no-member
        self.assertEqual(len(strange_reads), 1)
        strange_read = strange_reads[0]
        self.assertEqual(strange_read.query_name, 'name_strange:rbc:GGGG')
        self.assertEqual(strange_read.reference_start, 250)
        self.assertEqual(strange_read.cigar, [(0, 45), (2, 15), (0, 70)])
Esempio n. 9
0
 def setUp(self):
     self.tempdir = get_temp_dir()
     warnings.simplefilter("ignore", ResourceWarning)
Esempio n. 10
0
 def setUp(self):
     self.tempdir = get_temp_dir()
     self.tmpfile = get_temp_file_name(extension='.gtf.gz')
     warnings.simplefilter("ignore", ResourceWarning)