def test_limits_downstream(self): """Landmarks with too short upstream segment should not be used.""" regions = make_file_from_list([ [ 'chr1', '.', 'CDS', '150', '200', '.', '+', '.', 'gene_name "A";' ], [ 'chr1', '.', 'intron', '201', '350', '.', '+', '.', 'gene_name "A";' ], ]) fn = rnamaps.make_landmarks_file(regions, 'exon-intron') self.assertEqual(make_list_from_file(fn), []) regions = make_file_from_list([ [ 'chr1', '.', 'CDS', '151', '200', '.', '-', '.', 'gene_name "A";' ], [ 'chr1', '.', 'intron', '201', '351', '.', '-', '.', 'gene_name "A";' ], ]) fn = rnamaps.make_landmarks_file(regions, 'intron-exon') self.assertEqual(make_list_from_file(fn), [])
def test_only_barcode5_1_mismatch(self): # Only barcode5, one mismatch demultiplex.run(self.fq_fname, self.adapter, self.barcodes5[:2], mismatches=1, out_dir=self.dir) demux_file = 'demux_{}.fastq.gz'.format(self.barcodes5[0]) fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(fq_list[0], ['@header1:rbc:GGG/1']) self.assertEqual(fq_list[1], [self.entry1.seq[6:-10]]) self.assertEqual(fq_list[3], [self.entry1.qual[6:-10]]) demux_file = 'demux_{}.fastq.gz'.format(self.barcodes5[1]) fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(fq_list[0], ['@header2:rbc:AA']) self.assertEqual(fq_list[1], [self.entry2.seq[5:-10]]) self.assertEqual(fq_list[3], [self.entry2.qual[5:-10]]) self.assertEqual(fq_list[4], ['@header3:rbc:TT']) self.assertEqual(fq_list[5], [self.entry3.seq[5:-10]]) self.assertEqual(fq_list[7], [self.entry3.qual[5:-10]]) demux_file = 'demux_{}.fastq.gz'.format('nomatch5') fq_list = make_list_from_file(os.path.join(self.dir, demux_file)) self.assertEqual(fq_list, [])
def test_basic(self): regions = make_file_from_list([ [ 'chr1', '.', 'CDS', '150', '200', '.', '+', '.', 'gene_name "A";' ], [ 'chr1', '.', 'intron', '201', '351', '.', '+', '.', 'gene_name "A";' ], ]) fn = rnamaps.make_landmarks_file(regions, 'exon-intron') self.assertEqual(make_list_from_file(fn), [ ['chr1', '200', '201', 'A', '.', '+'], ]) regions = make_file_from_list([ [ 'chr1', '.', 'CDS', '150', '200', '.', '-', '.', 'gene_name "A";' ], [ 'chr1', '.', 'intron', '201', '351', '.', '-', '.', 'gene_name "A";' ], ]) fn = rnamaps.make_landmarks_file(regions, 'intron-exon') self.assertEqual(make_list_from_file(fn), [ ['chr1', '199', '200', 'A', '.', '-'], ])
def test_overwrite(self): original_seq = make_list_from_file(self.reads)[1][0] return_code = cutadapt.run(self.reads, self.adapter, overwrite=True) trimmed_seq = make_list_from_file(self.reads)[1][0] self.assertTrue(original_seq.endswith(self.adapter)) self.assertEqual(original_seq[:-(len(self.adapter))], trimmed_seq) self.assertEqual(return_code, 0) self.assertEqual(return_code, 0)
def test_simple(self): return_code = cutadapt.run(self.reads, self.adapter, reads_trimmed=self.tmp, qual_trim=0, minimum_length=20) original_seq = make_list_from_file(self.reads)[1][0] trimmed_seq = make_list_from_file(self.tmp)[1][0] self.assertTrue(original_seq.endswith(self.adapter)) self.assertEqual(original_seq[:-(len(self.adapter))], trimmed_seq) self.assertEqual(return_code, 0)
def get_summary_reports(self, annotation, cross_links): """Help running tests for ``summary_report`` with less clutter.""" annotation_file = make_file_from_list(annotation) cross_links_file = make_file_from_list(cross_links) segment.summary_templates(annotation_file, self.out_dir) summary.summary_reports(annotation_file, cross_links_file, self.out_dir, self.out_dir) return [ make_list_from_file(os.path.join(self.out_dir, segment.SUMMARY_TYPE), '\t'), make_list_from_file(os.path.join(self.out_dir, segment.SUMMARY_SUBTYPE), '\t'), make_list_from_file(os.path.join(self.out_dir, segment.SUMMARY_GENE), '\t'), ]
def test_templates1(self): out_dir = get_temp_dir() segmentation = make_file_from_list([ ['1', '.', 'intergenic', '1', '10', '.', '+', '.', 'gene_id ".";'], [ '1', '.', 'UTR3', '11', '20', '.', '+', '.', 'biotype "mRNA";gene_name "ABC";gene_id "G1";' ], [ '1', '.', 'intron', '21', '30', '.', '+', '.', 'biotype "lncRNA";gene_name "ABC";gene_id "G1";' ], [ '1', '.', 'CDS', '31', '40', '.', '+', '.', 'biotype "mRNA";gene_name "DEF";gene_id "G2";' ], [ '1', '.', 'intron', '41', '50', '.', '+', '.', 'biotype "sRNA,lncRNA";gene_name "DEF"; gene_id "G2";' ], ]) region.summary_templates(segmentation, out_dir) results_type = make_list_from_file( os.path.join(out_dir, region.TEMPLATE_TYPE), '\t') self.assertEqual(results_type, [ ['CDS', '10'], ['UTR3', '10'], ['intron', '20'], ['intergenic', '10'], ]) results_subtype = make_list_from_file(os.path.join( out_dir, region.TEMPLATE_SUBTYPE), fields_separator='\t') self.assertEqual(results_subtype, [ ['CDS mRNA', '10'], ['UTR3 mRNA', '10'], ['intron lncRNA', '15'], ['intron sRNA', '5'], ['intergenic', '10'], ]) results_gene = make_list_from_file(os.path.join( out_dir, region.TEMPLATE_GENE), fields_separator='\t') self.assertEqual(results_gene, [ ['.', '', '10'], ['G1', 'ABC', '20'], ['G2', 'DEF', '20'], ])
def test_explicit_whole_in(self): """ Whole read is in single transcript and is crossing the exon-intron landmark (it is explicit). Provide three reads, with two different cross-links. One cross-link has two distinct randomers. """ bam = make_bam_file({ 'chromosomes': [('1', 1000)], 'segments': [ # (qname, flag, refname, pos, mapq, cigar, tags) ('name2:rbc:CCCC', 0, 0, 140, 255, [(0, 50)], { 'NH': 1 }), ('name2:rbc:AAAA', 0, 0, 142, 255, [(0, 50)], { 'NH': 1 }), ('name2:rbc:CCCC', 0, 0, 142, 255, [(0, 50)], { 'NH': 1 }), ] }) expected = [ ['RNAmap', 'type', 'position', 'all', 'explicit'], ['UTR5-intron', '-10', '1', '1'], ['UTR5-intron', '-8', '2', '2'], ] rnamaps.run(bam, self.gtf, self.out, self.strange, self.cross_tr, mismatches=1) self.assertEqual(expected, make_list_from_file(self.out))
def test_implicit_exons(self): """ Whole read is in single transcript and in single segment. Also, this segment is of EXON_TYPE in the "middle" segment in transcript. Only one read. """ bam = make_bam_file({ 'chromosomes': [('1', 1000)], 'segments': [ # (qname, flag, refname, pos, mapq, cigar, tags) ('name2:rbc:CCCC', 0, 0, 205, 255, [(0, 20)], { 'NH': 1 }), ] }) expected = [ ['RNAmap', 'type', 'position', 'all', 'explicit'], ['CDS-UTR3', '-25', '0.25', '0'], ['CDS-intron', '-25', '0.25', '0'], ['UTR5-CDS', '5', '0.25', '0'], ['intron-CDS', '5', '0.25', '0'], ] rnamaps.run(bam, self.gtf, self.out, self.strange, self.cross_tr, mismatches=1, implicit_handling='split') self.assertEqual(expected, make_list_from_file(self.out))
def test_implicit_inter_tr(self): """ Whole read is in single transcript, single segment. But the segment borders on intergenic (downstream). """ bam = make_bam_file( { 'chromosomes': [('1', 1000)], 'segments': [ # (qname, flag, refname, pos, mapq, cigar, tags) ('name2:rbc:CCCC', 0, 0, 610, 255, [(0, 30)], { 'NH': 1 }), ] }, rnd_seed=0) expected = [ ['RNAmap', 'type', 'position', 'all', 'explicit'], ['CDS-CDS', '-40', '0.3333', '0'], ['CDS-intron', '-40', '0.3333', '0'], ['intergenic-CDS', '10', '0.3333', '0'], ] rnamaps.run(bam, self.gtf, self.out, self.strange, self.cross_tr, mismatches=1, implicit_handling='split') self.assertEqual(expected, make_list_from_file(self.out))
def test_complement(self): genome_file = make_file_from_list( [ ['1', '2000'], ['2', '1000'], ['MT', '500'], ], bedtool=False) genes = list_to_intervals([ ['1', '.', 'gene1', '200', '400', '.', '+', '.', '.'], ['1', '.', 'gene2', '300', '600', '.', '+', '.', '.'], ['1', '.', 'gene3', '200', '500', '.', '+', '.', '.'], ['2', '.', 'gene4', '100', '200', '.', '+', '.', '.'], ['2', '.', 'gene5', '100', '300', '.', '-', '.', '.'], ]) complement = make_list_from_file(segment._complement(genes, genome_file, '+'), fields_separator='\t') empty_col8 = 'ID "inter%s"; gene_id "."; transcript_id ".";' expected = [ ['1', '.', 'intergenic', '1', '199', '.', '+', '.', empty_col8 % "P00000"], ['1', '.', 'intergenic', '601', '2000', '.', '+', '.', empty_col8 % "P00001"], ['2', '.', 'intergenic', '1', '99', '.', '+', '.', empty_col8 % "P00002"], ['2', '.', 'intergenic', '201', '1000', '.', '+', '.', empty_col8 % "P00003"], ['MT', '.', 'intergenic', '1', '500', '.', '+', '.', empty_col8 % "P00004"], ] self.assertEqual(complement, expected)
def test_clusters(self): fin_sites = make_file_from_list([ ['1', '1', '2', '.', '1', '+'], ['1', '2', '3', '.', '1', '+'], ['1', '3', '4', '.', '1', '+'], ['1', '4', '5', '.', '2', '+'], ['1', '4', '5', '.', '1', '-'], ['1', '5', '6', '.', '1', '+'], ['1', '6', '7', '.', '1', '-'], ['1', '7', '8', '.', '1', '-'], ['1', '10', '11', '.', '1', '+'], ['1', '11', '12', '.', '2', '+'], ['1', '12', '13', '.', '1', '+'], ]) fin_peaks = make_file_from_list([ ['1', '4', '5', 'cl1', '1', '+'], ['1', '4', '5', 'cl2', '1', '-'], ['1', '5', '6', 'cl3', '1', '+'], ['1', '11', '12', 'cl4', '2', '+'], ]) fout_clusters = get_temp_file_name() clusters.run(fin_sites, fin_peaks, fout_clusters, dist=3, slop=2) result = make_list_from_file(fout_clusters, fields_separator='\t') expected = [ ['1', '2', '6', 'cl1,cl3', '5', '+'], ['1', '4', '7', 'cl2', '2', '-'], ['1', '10', '13', 'cl4', '4', '+'], ] self.assertEqual(expected, result)
def test_bed2bedgraph_params(self): """ Test with custom parameters. """ iCount.files.bedgraph.bed2bedgraph( self.bed, self.bedgraph, name='Sample name', description='A long and detailed description.', visibility='full', priority=20, color='256,0,0', alt_color='0,256,0', max_height_pixels='100:50:0', ) expected = [ [ 'track type=bedGraph name="Sample name" description="A long and detailed description."' ' visibility=full priority=20 color=256,0,0 altColor=0,256,0 maxHeightPixels=100:50:0' ], ['1', '4', '5', '+5'], ['1', '5', '6', '+1'], ['1', '5', '6', '-1'], ['2', '5', '6', '+3'], ] result = make_list_from_file(self.bedgraph, fields_separator='\t') self.assertEqual(result, expected)
def template(cross_links, annotation, subtype='biotype', excluded_types=None): """ Utility function for testing iCount.analysis.annotate Instead of input files, accept the file content in form of lists and create temporary files from them on the fly. This avoids the problem of having a bunch of multiple small files or one large file (which would violate the idea of test isolation). For example of how to use this function check any test that uses it. Parameters ---------- cross_links : list List representation of cross-links file. annotation : list List representation of annotation file. Returns ------- list List representation of output file of analysis.annotate(). """ cross_links_file = make_file_from_list(cross_links, extension='bed.gz') annotation_file = make_file_from_list(annotation, extension='gtf.gz') out_file = get_temp_file_name(extension='bed.gz') annotate.annotate_cross_links(annotation_file, cross_links_file, out_file, subtype=subtype, excluded_types=excluded_types) return make_list_from_file(out_file, fields_separator='\t')
def test_negative_strand(self): """ Whole read is in single transcript, single segment. But the segment borders on intergenic (downstream). """ gtf_neg_data = [ i[:6] + ['-'] + i[7:] for i in intervals_to_list(self.gtf_data) ] gtf_neg = make_file_from_list(gtf_neg_data) bam = make_bam_file({ 'chromosomes': [('1', 1000)], 'segments': [ # (qname, flag, refname, pos, mapq, cigar, tags) ('name2:rbc:CCCC', 16, 0, 549, 255, [(0, 30)], { 'NH': 1 }), ] }) expected = [ ['RNAmap', 'type', 'position', 'all', 'explicit'], ['CDS-intergenic', '20', '0.5', '0'], ['intergenic-CDS', '-80', '0.5', '0'], ] rnamaps.run(bam, gtf_neg, self.out, self.strange, self.cross_tr, mismatches=1, implicit_handling='split') self.assertEqual(expected, make_list_from_file(self.out))
def test_normalisation(self): norm_file = get_temp_file_name(extension='txt') rnamaps.make_normalization(self.gtf, norm_file) expected = [ ['RNAmap_type', 'distance', 'segments'], ['CDS-UTR3', '-1', '1'], ['CDS-UTR3', '0', '1'], ['CDS-UTR3', '1', '1'], ['CDS-intron', '-1', '1'], ['CDS-intron', '0', '1'], ['CDS-intron', '1', '1'], ['CDS-intron', '2', '1'], ['integrenic-CDS', '-2', '1'], ['integrenic-CDS', '-1', '1'], ['integrenic-CDS', '0', '1'], ['intron-UTR3', '-3', '1'], ['intron-UTR3', '-2', '1'], ['intron-UTR3', '-1', '1'], ['intron-UTR3', '0', '1'], ['intron-UTR3', '1', '1'], ['intron-ncRNA', '-1', '1'], ['intron-ncRNA', '0', '1'], ['ncRNA-integrenic', '-1', '1'], ['ncRNA-integrenic', '0', '1'], ['ncRNA-integrenic', '1', '1'], ['ncRNA-intron', '-2', '1'], ['ncRNA-intron', '-1', '1'], ['ncRNA-intron', '0', '1'], ['ncRNA-ncRNA', '-2', '1'], ['ncRNA-ncRNA', '-1', '1'], ['ncRNA-ncRNA', '0', '1'], ] self.assertEqual(expected, make_list_from_file(norm_file))
def test_implicit_whole_in(self): """ Whole read is in single transcript and in single segment. Also, this segment is the "middle" segment in transcript. Provide three reads, with two different cross-links. One cross-link has two distinct randomers. """ bam = make_bam_file({ 'chromosomes': [('1', 1000)], 'segments': [ # (qname, flag, refname, pos, mapq, cigar, tags) ('name2:rbc:CCCC', 0, 0, 160, 255, [(0, 30)], { 'NH': 1 }), ('name2:rbc:CCCC', 0, 0, 163, 255, [(0, 30)], { 'NH': 1 }), ('name2:rbc:GGGG', 0, 0, 163, 255, [(0, 30)], { 'NH': 1 }), ] }) expected = [ ['RNAmap', 'type', 'position', 'all', 'explicit'], ['UTR5-intron', '10', '1', '0'], ['UTR5-intron', '13', '2', '0'], ] rnamaps.run(bam, self.gtf, self.out, self.strange, self.cross_tr, mismatches=1) self.assertEqual(expected, make_list_from_file(self.out))
def test_cross_transcript_read(self): """ Read is half in transcript region and half in intergenic. """ bam = make_bam_file({ 'chromosomes': [('1', 1000)], 'segments': [ # (qname, flag, refname, pos, mapq, cigar, tags) ('name2:rbc:CCCC', 0, 0, 235, 255, [(0, 50)], { 'NH': 1 }), ] }) expected = [ [ 'chrom', 'strand', 'xlink', 'second-start', 'end-position', 'read_len' ], ['1', '+', '234', '0', '284', '50'], ] rnamaps.run(bam, self.gtf, self.out, self.strange, self.cross_tr, mismatches=1) self.assertEqual(expected, make_list_from_file(self.cross_tr))
def test_implicit_intergenic(self): """ Whole read is in intergenic. """ bam = make_bam_file({ 'chromosomes': [('1', 1000)], 'segments': [ # (qname, flag, refname, pos, mapq, cigar, tags) ('name2:rbc:CCCC', 0, 0, 530, 255, [(0, 30)], { 'NH': 1 }), ] }) expected = [ ['RNAmap', 'type', 'position', 'all', 'explicit'], ['CDS-intergenic', '30', '0.5', '0'], ['intergenic-CDS', '-70', '0.5', '0'], ] rnamaps.run(bam, self.gtf, self.out, self.strange, self.cross_tr, mismatches=1, implicit_handling='split') self.assertEqual(expected, make_list_from_file(self.out))
def test_explicit_intergenic_right(self): """ Read is half in transcript region and half in intergenic. """ bam = make_bam_file({ 'chromosomes': [('1', 1000)], 'segments': [ # (qname, flag, refname, pos, mapq, cigar, tags) ('name2:rbc:CCCC', 0, 0, 480, 255, [(0, 50)], { 'NH': 1 }), ] }) expected = [ ['RNAmap', 'type', 'position', 'all', 'explicit'], ['CDS-intergenic', '-20', '1', '1'], ] rnamaps.run(bam, self.gtf, self.out, self.strange, self.cross_tr, mismatches=1) self.assertEqual(expected, make_list_from_file(self.out))
def test_run(self): fin_annotation = make_file_from_list([ [ '1', '.', 'gene', '10', '20', '.', '+', '.', 'gene_name "A"; gene_id "1";' ], [ '1', '.', 'transcript', '10', '20', '.', '+', '.', 'gene_name "B"; gene_id "1";' ], [ '2', '.', 'CDS', '10', '20', '.', '+', '.', 'gene_name "C"; gene_id "1";' ], ]) fin_sites = make_file_from_list([ ['1', '14', '15', '.', '3', '+'], ['1', '16', '17', '.', '5', '+'], ['2', '16', '17', '.', '5', '+'], ]) fout_peaks = get_temp_file_name(extension='.bed.gz') fout_scores = get_temp_file_name(extension='.tsv.gz') peaks.run(fin_annotation, fin_sites, fout_peaks, scores=fout_scores) out_peaks = make_list_from_file(fout_peaks, fields_separator='\t') out_scores = make_list_from_file(fout_scores, fields_separator='\t') # Remove header: out_scores = out_scores[1:] expected_peaks = [ ['1', '14', '15', 'A-1', '3', '+'], ['1', '16', '17', 'A-1', '5', '+'], ] expected_scores = [ ['1', '14', '+', 'A', '1', '3', '8', '0.036198'], ['1', '16', '+', 'A', '1', '5', '8', '0.036198'], [ '2', '16', '+', 'not_annotated', 'not_annotated', '5', 'not_calculated', '1' ], ] self.assertEqual(out_peaks, expected_peaks) self.assertEqual(out_scores, expected_scores)
def test_basic(self): segmentation = [ # Transcript #1 [ '1', '.', 'ncRNA', '1', '10', '.', '+', '.', 'biotype "A"; gene_name "X";' ], [ '1', '.', 'intron', '11', '20', '.', '+', '.', 'biotype "A"; gene_name "X";' ], [ '1', '.', 'CDS', '21', '30', '.', '+', '.', 'biotype "A"; gene_name "X";' ], [ '1', '.', 'UTR3', '31', '40', '.', '+', '.', 'biotype "A"; gene_name "X";' ], # Transcript #1 [ '1', '.', 'CDS', '5', '14', '.', '+', '.', 'biotype "A"; gene_name "X";' ], [ '1', '.', 'intron', '15', '24', '.', '+', '.', 'biotype "A"; gene_name "X";' ], [ '1', '.', 'CDS', '25', '34', '.', '+', '.', 'biotype "A"; gene_name "X";' ], # Also negative strand: [ '1', '.', 'CDS', '3', '32', '.', '-', '.', 'biotype "A"; gene_name "X";' ], ] expected = [ ['1', '0', '4', '.', '.', '+'], ['1', '4', '10', '.', '.', '+'], ['1', '10', '14', '.', '.', '+'], ['1', '14', '20', '.', '.', '+'], ['1', '20', '24', '.', '.', '+'], ['1', '24', '30', '.', '.', '+'], ['1', '30', '34', '.', '.', '+'], ['1', '34', '40', '.', '.', '+'], ['1', '2', '32', '.', '.', '-'], ] segmentation_file = make_file_from_list(segmentation) borders_file = region.construct_borders(BedTool(segmentation_file)) results = make_list_from_file(borders_file, fields_separator='\t') self.assertEqual( expected, # Sort results by chrom, strand, start, stop sorted(results, key=lambda x: (x[0], x[-1], int(x[1]), int(x[2]))))
def test_all_good(self): gtf_in_data = list_to_intervals([ ['1', '.', 'gene', '400', '500', '.', '+', '.', 'gene_id "G2";'], ['1', '.', 'transcript', '400', '500', '.', '+', '.', 'gene_id "G2"; transcript_id "T3";'], ['1', '.', 'exon', '400', '430', '.', '+', '.', 'gene_id "G2"; transcript_id "T3"; exon_number "1"'], ['1', '.', 'CDS', '410', '430', '.', '+', '.', 'gene_id "G2"; transcript_id "T3";'], ['1', '.', 'exon', '470', '500', '.', '+', '.', 'gene_id "G2"; transcript_id "T3"; exon_number "2"'], ['1', '.', 'CDS', '470', '490', '.', '+', '.', 'gene_id "G2"; transcript_id "T3";'], ]) gtf_in_file = make_file_from_list(intervals_to_list(gtf_in_data)) gtf_out = tempfile.NamedTemporaryFile(mode='w+', delete=False) gtf_out.close() genome_file = make_file_from_list( [ ['1', '2000'], ['MT', '500'], ], bedtool=False) gtf_out_data = list_to_intervals(make_list_from_file(segment.get_regions( gtf_in_file, gtf_out.name, genome_file), fields_separator='\t')) expected = list_to_intervals([ ['1', '.', 'intergenic', '1', '399', '.', '+', '.', 'gene_id "."; transcript_id ".";'], ['1', '.', 'intergenic', '1', '2000', '.', '-', '.', 'gene_id "."; transcript_id ".";'], ['1', '.', 'transcript', '400', '500', '.', '+', '.', 'gene_id "G2";transcript_id "T3"; biotype ".";'], ['1', '.', 'UTR5', '400', '409', '.', '+', '.', 'gene_id "G2";exon_number "1";transcript_id "T3"; biotype ".";'], ['1', '.', 'gene', '400', '500', '.', '+', '.', 'gene_id "G2"; biotype "[.]";'], ['1', '.', 'CDS', '410', '430', '.', '+', '.', 'gene_id "G2";transcript_id "T3"; biotype ".";'], ['1', '.', 'intron', '431', '469', '.', '+', '.', 'gene_id "G2"; transcript_id "T3"; biotype ".";'], ['1', '.', 'CDS', '470', '490', '.', '+', '.', 'gene_id "G2";transcript_id "T3"; biotype ".";'], ['1', '.', 'UTR3', '491', '500', '.', '+', '.', 'gene_id "G2";exon_number "2";transcript_id "T3"; biotype ".";'], ['1', '.', 'intergenic', '501', '2000', '.', '+', '.', 'gene_id "."; transcript_id ".";'], ['MT', '.', 'intergenic', '1', '500', '.', '+', '.', 'gene_id "."; transcript_id ".";'], ['MT', '.', 'intergenic', '1', '500', '.', '-', '.', 'gene_id "."; transcript_id ".";'], ]) self.assertEqual(expected, gtf_out_data)
def merge_bed_wrapper(data): """ TODO """ files = [] for file_ in data: files.append(make_file_from_list(file_)) out_file = tempfile.NamedTemporaryFile(delete=False).name return make_list_from_file(merge_bed(out_file, files), fields_separator='\t')
def merge_bed_wrapper(data): """ TODO """ files = [] for file_ in data: files.append(make_file_from_list(file_)) out_file = get_temp_file_name() merge_bed(out_file, files) return make_list_from_file(out_file, fields_separator='\t')
def test_bed2bedgraph(self): iCount.files.bedgraph.bed2bedgraph(self.bed, self.bedgraph) expected = [ ['track type=bedGraph name="User Track" description="User Supplied Track"'], ['1', '4', '5', '+5'], ['1', '5', '6', '+1'], ['1', '5', '6', '-1'], ['2', '5', '6', '+3'], ] result = make_list_from_file(self.bedgraph, fields_separator='\t') self.assertEqual(result, expected)
def test_basic(self): # seg is compositon of BED6 and GTF interval: nonmerged = make_file_from_list([ [ '1', '.', 'UTR3', '1', '10', '.', '+', '.', 'biotype "lncRNA";gene_id "id1";' ], [ '1', '.', 'UTR3', '11', '20', '.', '+', '.', 'biotype "lncRNA";gene_id "id1";' ], [ '1', '.', 'UTR3', '21', '30', '.', '+', '.', 'biotype "lncRNA";gene_id "id2";' ], [ '1', '.', 'UTR3', '31', '40', '.', '+', '.', 'biotype "lncRNA";gene_id "id1";' ], [ '1', '.', 'UTR3', '31', '40', '.', '-', '.', 'biotype "lncRNA";gene_id "id1";' ], ]) expected = [ [ '1', '.', 'UTR3', '1', '20', '.', '+', '.', 'biotype "lncRNA";gene_id "id1";' ], [ '1', '.', 'UTR3', '21', '30', '.', '+', '.', 'biotype "lncRNA";gene_id "id2";' ], [ '1', '.', 'UTR3', '31', '40', '.', '+', '.', 'biotype "lncRNA";gene_id "id1";' ], [ '1', '.', 'UTR3', '31', '40', '.', '-', '.', 'biotype "lncRNA";gene_id "id1";' ], ] region.merge_regions(nonmerged, self.tmp) results = make_list_from_file(self.tmp, fields_separator='\t') # Since order of attrs can be arbitrary, equality checks are more complex: for res, exp in zip(results, expected): self.assertEqual(res[:8], exp[:8]) self.assertEqual( ';'.join(sorted(res[8].split(';'))), ';'.join(sorted(exp[8].split(';'))), )
def test_basic(self): regions = make_file_from_list([ ['chr1', '.', 'CDS', '150', '200', '.', '+', '.', 'gene_name "A";'], ['chr1', '.', 'intron', '201', '400', '.', '+', '.', 'gene_name "A";'], ['chr1', '.', 'CDS', '401', '600', '.', '+', '.', 'gene_name "A";'], ]) landmarks = get_temp_file_name(extension='bed') landmark.make_landmarks(regions, landmarks) self.assertEqual(make_list_from_file(landmarks), [ ['chr1', '200', '201', 'exon-intron;A', '.', '+'], ['chr1', '400', '401', 'intron-exon;A', '.', '+'], ])
def test_fastq_file_write(self): data = [ ['@header1', 'AAA', '+', 'FFF'], ['@header2', 'AAAA', '+', 'FFFF'], ] fq_file_name = get_temp_file_name(extension='fq.gz') fq_file = iCount.files.fastq.FastqFile(fq_file_name, 'wt') for line in data: fq_file.write(iCount.files.fastq.FastqEntry(*line)) fq_file.close() result = make_list_from_file(fq_file_name) expected = [['@header1'], ['AAA'], ['+'], ['FFF'], ['@header2'], ['AAAA'], ['+'], ['FFFF']] self.assertEqual(result, expected)
def test_bed2bedgraph_params(self): """ Test with custom ``name`` and ``description`` parameters. Note that ``name`` is too long and is trimmed to 15 characters. """ iCount.files.bedgraph.bed2bedgraph( self.bed, self.bedgraph, name='Longer than 15 chars.', description='Custom text.') expected = [ ['track type=bedGraph name="Longer than 15 " description="Custom text."'], ['1', '4', '5', '+5'], ['1', '5', '6', '+1'], ['1', '5', '6', '-1'], ['2', '5', '6', '+3'], ] result = make_list_from_file(self.bedgraph, fields_separator='\t') self.assertEqual(result, expected)