def test_e2e(self): """ From raw reads and ENSEMBL annotation to rnamaps. """ # Make segmentation & regions file seg = get_temp_file_name(extension='gtf') out_dir = get_temp_dir() iCount.genomes.segment.get_segments(self.gtf, seg, self.fai) iCount.genomes.region.make_regions(seg, out_dir) regions = os.path.join(out_dir, iCount.genomes.region.REGIONS_FILE) # Build STAR index: genome_index = get_temp_dir() rcode = iCount.externals.star.build_index(self.fasta, genome_index, annotation=self.gtf) self.assertEqual(rcode, 0) # Map reads: map_dir = get_temp_dir() rcode = iCount.externals.star.map_reads( self.reads, genome_index, out_dir=map_dir, annotation=self.gtf) self.assertEqual(rcode, 0) # Get bam with mapped reads: bam = [fname for fname in os.listdir(map_dir) if fname.startswith('Aligned')][0] bam = os.path.join(map_dir, bam) sites_single = get_temp_file_name(extension='bed.gz') sites_multi = get_temp_file_name(extension='bed.gz') skipped = get_temp_file_name(extension='bam') iCount.mapping.xlsites.run(bam, sites_single, sites_multi, skipped) iCount.analysis.rnamaps.run(sites_single, regions)
def setUp(self): warnings.simplefilter("ignore", ResourceWarning) # Temporary file names to use for output: self.tmp1 = get_temp_file_name() self.tmp2 = get_temp_file_name() self.dir = get_temp_dir() self.dir2 = get_temp_dir() self.cross_links = make_file_from_list([ ['1', '16', '17', '.', '5', '+'], ['1', '14', '15', '.', '5', '+'], ['1', '15', '16', '.', '5', '+'], ], extension='bed') self.peaks = make_file_from_list([ ['1', '15', '16', '.', '15', '+'], ]) self.annotation = make_file_from_list([ ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "A";'], ['1', '.', 'ncRNA', '10', '20', '.', '+', '.', 'biotype "A";'], ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "A";'], ['1', '.', 'CDS', '10', '20', '.', '+', '.', 'biotype "B";'], ['1', '.', 'CDS', '10', '20', '.', '-', '.', 'biotype "C";'], ['1', '.', 'CDS', '12', '18', '.', '+', '.', 'biotype "A";'], ['1', '.', 'CDS', '30', '40', '.', '+', '.', 'biotype "D";'], ]) self.gtf = make_file_from_list([ ['1', '.', 'gene', '10', '20', '.', '+', '.', 'gene_id "A";'], [ '1', '.', 'transcript', '10', '20', '.', '+', '.', 'gene_id "A"; transcript_id "AA";' ], [ '1', '.', 'exon', '10', '20', '.', '+', '.', 'gene_id "A"; transcript_id "AA"; exon_number "1";' ], ]) self.bam = make_bam_file( { 'chromosomes': [ ('1', 3000), ('2', 2000), ], 'segments': [ ('name3:rbc:CCCC:', 0, 0, 100, 20, [(0, 100)], { 'NH': 1 }), ('name4:ABC', 0, 0, 300, 20, [(0, 200)], { 'NH': 11 }), ] }, rnd_seed=0)
def setUp(self): warnings.simplefilter("ignore", ResourceWarning) self.out_dir = get_temp_dir() self.type_header = ['Type', 'Length', 'cDNA #', 'cDNA %'] self.subtype_header = ['Subtype', 'Length', 'cDNA #', 'cDNA %'] self.gene_header = [ 'Gene name (Gene ID)', 'Length', 'cDNA #', 'cDNA %' ]
def setUp(self): self.dir = get_temp_dir() self.index_dir = get_temp_dir() self.genome = make_fasta_file(num_sequences=2, seq_len=1000) self.reads = make_fastq_file(genome=self.genome) self.annotation = make_file_from_list([ ['1', '.', 'gene', '10', '20', '.', '+', '.', 'gene_id "A";'], [ '1', '.', 'transcript', '10', '20', '.', '+', '.', 'gene_id "A"; transcript_id "AA";' ], [ '1', '.', 'exon', '10', '20', '.', '+', '.', 'gene_id "A"; transcript_id "AA"; exon_number "1";' ], ]) warnings.simplefilter("ignore", ResourceWarning)
def setUp(self): self.dir = get_temp_dir() self.adapter = 'CCCCCCCCC' self.barcodes = [ 'NNNGGTTNN', 'NNNTTGTNN', 'NNNCAATNN', 'NNNACCTNN', 'NNNGGCGNN', ] self.reads = make_fastq_file(barcodes=self.barcodes, adapter=self.adapter) warnings.simplefilter("ignore", ResourceWarning)
def test_templates1(self): out_dir = get_temp_dir() segmentation = make_file_from_list([ ['1', '.', 'intergenic', '1', '10', '.', '+', '.', 'gene_id ".";'], [ '1', '.', 'UTR3', '11', '20', '.', '+', '.', 'biotype "mRNA";gene_name "ABC";gene_id "G1";' ], [ '1', '.', 'intron', '21', '30', '.', '+', '.', 'biotype "lncRNA";gene_name "ABC";gene_id "G1";' ], [ '1', '.', 'CDS', '31', '40', '.', '+', '.', 'biotype "mRNA";gene_name "DEF";gene_id "G2";' ], [ '1', '.', 'intron', '41', '50', '.', '+', '.', 'biotype "sRNA,lncRNA";gene_name "DEF"; gene_id "G2";' ], ]) region.summary_templates(segmentation, out_dir) results_type = make_list_from_file( os.path.join(out_dir, region.TEMPLATE_TYPE), '\t') self.assertEqual(results_type, [ ['CDS', '10'], ['UTR3', '10'], ['intron', '20'], ['intergenic', '10'], ]) results_subtype = make_list_from_file(os.path.join( out_dir, region.TEMPLATE_SUBTYPE), fields_separator='\t') self.assertEqual(results_subtype, [ ['CDS mRNA', '10'], ['UTR3 mRNA', '10'], ['intron lncRNA', '15'], ['intron sRNA', '5'], ['intergenic', '10'], ]) results_gene = make_list_from_file(os.path.join( out_dir, region.TEMPLATE_GENE), fields_separator='\t') self.assertEqual(results_gene, [ ['.', '', '10'], ['G1', 'ABC', '20'], ['G2', 'DEF', '20'], ])
def setUp(self): warnings.simplefilter("ignore", ResourceWarning) self.dir = get_temp_dir() self.adapter = 'AAAAAAAAAA' self.barcodes5 = [ 'NNAAAN', 'NGGGN', 'NGGGN', ] self.barcodes3 = [ '.', 'NNGGG', 'NCCC', ] # Header: early version Illumina header # Barcodes: exact match to the barcode set #1 self.entry1 = FastqEntry( '@header1/1', 'GGAAAG' + make_sequence(40) + self.adapter, '+', make_quality_scores(56), ) # Header: contains id and description # Barcodes: one mismatch on 5' end for barcode set #2 self.entry2 = FastqEntry( '@header2 blah', 'AGGTA' + make_sequence(40) + 'AAGGG' + self.adapter, '+', make_quality_scores(60), ) # Header: simple header # Barcodes: one mismatch on 3' end for barcode set #3 self.entry3 = FastqEntry( '@header3', 'TGGGT' + make_sequence(40) + 'TACC' + self.adapter, '+', make_quality_scores(59), ) self.fq_fname = get_temp_file_name(extension='fq') self.fq_file = iCount.files.fastq.FastqFile(self.fq_fname, 'wt') for entry in [self.entry1, self.entry2, self.entry3]: self.fq_file.write(entry) self.fq_file.close()
def test_e2e(self): """ From raw reads and ENSEMBL annotation to rnamaps. """ # Make segmentation seg = get_temp_file_name(extension='gtf') iCount.genomes.segment.get_regions(self.gtf, seg, self.fai) # Build STAR index: genome_index = get_temp_dir() rcode = iCount.externals.star.build_index(self.fasta, genome_index, annotation=self.gtf) self.assertEqual(rcode, 0) # Map reads: map_dir = get_temp_dir() rcode = iCount.externals.star.map_reads( self.reads, genome_index, out_dir=map_dir, annotation=self.gtf) self.assertEqual(rcode, 0) # Get bam with mapped reads: bam = [fname for fname in os.listdir(map_dir) if fname.startswith('Aligned')][0] bam = os.path.join(map_dir, bam) # Make all sorts of analysis and save it: normal_out = get_temp_file_name(extension='tsv') strange_out = get_temp_file_name(extension='bam') cross_tr_out = get_temp_file_name(extension='tsv') iCount.analysis.rnamaps.run(bam, seg, normal_out, strange_out, cross_tr_out, implicit_handling='split') # Normal output: expected_out = [ ['RNAmap', 'type', 'position', 'all', 'explicit'], ['CDS-CDS', '-40', '0.3333', '0'], ['CDS-UTR3', '-25', '0.25', '0'], ['CDS-intergenic', '-20', '1', '1'], ['CDS-intergenic', '30', '0.5', '0'], ['CDS-intergenic', '250', '1', '0'], ['CDS-intron', '-40', '0.3333', '0'], ['CDS-intron', '-25', '0.25', '0'], ['UTR5-CDS', '5', '0.25', '0'], ['UTR5-intron', '-10', '1', '1'], ['UTR5-intron', '-8', '2', '2'], ['UTR5-intron', '10', '0.5', '0'], ['UTR5-intron', '13', '1', '0'], ['intergenic-CDS', '-70', '0.5', '0'], ['intergenic-CDS', '10', '0.3333', '0'], ['intergenic-UTR5', '-20', '1', '1'], ['intron-CDS', '-40', '0.5', '0'], ['intron-CDS', '-37', '1', '0'], ['intron-CDS', '5', '0.25', '0'], ] self.assertEqual(expected_out, make_list_from_file(normal_out)) # Cross transcript: expected_cross_transcript = [ ['chrom', 'strand', 'xlink', 'second-start', 'end-position', 'read_len'], ['1', '+', '234', '236', '284', '50'], ] self.assertEqual(expected_cross_transcript, make_list_from_file(cross_tr_out)) # Strange: strange_reads = list(pysam.AlignmentFile(strange_out, 'rb')) # pylint: disable=no-member self.assertEqual(len(strange_reads), 1) strange_read = strange_reads[0] self.assertEqual(strange_read.query_name, 'name_strange:rbc:GGGG') self.assertEqual(strange_read.reference_start, 250) self.assertEqual(strange_read.cigar, [(0, 45), (2, 15), (0, 70)])
def setUp(self): self.tempdir = get_temp_dir() warnings.simplefilter("ignore", ResourceWarning)
def setUp(self): self.tempdir = get_temp_dir() self.tmpfile = get_temp_file_name(extension='.gtf.gz') warnings.simplefilter("ignore", ResourceWarning)