def test_18_filter_reads(self): if ONLY and ONLY != '18': return if CHKTIME: t0 = time() for ali in ['map', 'sam']: seed(1) if 13436 == int(random()*100000): same_seed = True genome = generate_random_ali(ali) genome_bis = parse_fasta('test.fa~', verbose=False) self.assertEqual(genome, genome_bis) else: same_seed = False genome = parse_fasta('test.fa~') # PARSE SAM if ali == 'map': from pytadbit.parsers.map_parser import parse_map as parser else: try: from pytadbit.parsers.sam_parser import parse_sam as parser except ImportError: print 'ERROR: PYSAM not found, skipping test\n' continue parser(['test_read1.%s~' % (ali)], ['test_read2.%s~' % (ali)], './lala1-%s~' % (ali), './lala2-%s~' % (ali), genome, re_name='DPNII', mapper='GEM') # GET INTERSECTION from pytadbit.mapping import get_intersection get_intersection('lala1-%s~' % (ali), 'lala2-%s~' % (ali), 'lala-%s~' % (ali)) # FILTER masked = filter_reads('lala-%s~' % (ali), verbose=False, fast=(ali=='map')) self.assertEqual(masked[1]['reads'], 1000) self.assertEqual(masked[2]['reads'], 1000) self.assertEqual(masked[3]['reads'], 1000) self.assertEqual(masked[4]['reads'], 1000) if same_seed: self.assertEqual(masked[5]['reads'], 1110) self.assertEqual(masked[6]['reads'], 2332) self.assertEqual(masked[7]['reads'], 0) self.assertEqual(masked[8]['reads'], 141) self.assertEqual(masked[10]['reads'], 1) else: self.assertTrue (masked[5]['reads'] > 1000) self.assertEqual(masked[9]['reads'], 1000) apply_filter('lala-map~', 'lala-map-filt~', masked, filters=[1], reverse=True, verbose=False) self.assertEqual(len([True for l in open('lala-map-filt~') if not l.startswith('#')]), 1000) d = plot_iterative_mapping('lala1-map~', 'lala2-map~') self.assertEqual(d[0][1], 6000) if CHKTIME: self.assertEqual(True, True) print '18', time() - t0
def test_18_filter_reads(self): if ONLY and not "18" in ONLY: return if CHKTIME: t0 = time() for ali in ["map", "sam"]: seed(1) if 13436 == int(random()*100000): same_seed = True genome = generate_random_ali(ali) genome_bis = parse_fasta("test.fa~", verbose=False) self.assertEqual(genome, genome_bis) else: same_seed = False genome = parse_fasta("test.fa~") # PARSE SAM if ali == "map": from pytadbit.parsers.map_parser import parse_map as parser else: try: from pytadbit.parsers.sam_parser import parse_sam as parser except ImportError: print "ERROR: PYSAM not found, skipping test\n" continue parser(["test_read1.%s~" % (ali)], ["test_read2.%s~" % (ali)], "./lala1-%s~" % (ali), "./lala2-%s~" % (ali), genome, re_name="DPNII", mapper="GEM") # GET INTERSECTION from pytadbit.mapping import get_intersection get_intersection("lala1-%s~" % (ali), "lala2-%s~" % (ali), "lala-%s~" % (ali)) # FILTER masked = filter_reads("lala-%s~" % (ali), verbose=False, fast=(ali=="map")) self.assertEqual(masked[1]["reads"], 1000) self.assertEqual(masked[2]["reads"], 1000) self.assertEqual(masked[3]["reads"], 1000) self.assertEqual(masked[4]["reads"], 1000) if same_seed: self.assertEqual(masked[5]["reads"], 1110) self.assertEqual(masked[6]["reads"], 2332) self.assertEqual(masked[7]["reads"], 0) self.assertEqual(masked[8]["reads"], 141) self.assertEqual(masked[10]["reads"], 1) else: self.assertTrue (masked[5]["reads"] > 1000) self.assertEqual(masked[9]["reads"], 1000) apply_filter("lala-map~", "lala-map-filt~", masked, filters=[1], reverse=True, verbose=False) self.assertEqual(len([True for l in open("lala-map-filt~") if not l.startswith("#")]), 1000) d = plot_iterative_mapping("lala1-map~", "lala2-map~") self.assertEqual(d[0][1], 6000) if CHKTIME: self.assertEqual(True, True) print "18", time() - t0
} out1.write(read.format(**read1)) out2.write(read.format(**read2)) i += 1 out1.close() out2.close() # PARSE SAM if ali == 'map': from pytadbit.parsers.map_parser import parse_map as parser else: from pytadbit.parsers.sam_parser import parse_sam as parser parser(['test_read1.%s~' % (ali)], ['test_read2.%s~' % (ali)], './lala1-%s~' % (ali), './lala2-%s~' % (ali), genome, re_name='DPNII', mapper='GEM') # GET INTERSECTION from pytadbit.mapping.mapper import get_intersection get_intersection('lala1-%s~' % (ali), 'lala2-%s~' % (ali), 'lala-%s~' % (ali)) # FILTER from pytadbit.mapping.filter import filter_reads masked = filter_reads('lala-%s~' % (ali))
else: read2 = {'crm': crm1, 'pos': pos1, 'flag': flags[sd1], 'id': 'lala05.1%011d' % (i)} read1 = {'crm': crm2, 'pos': pos2, 'flag': flags[sd2], 'id': 'lala05.1%011d' % (i)} out1.write(read.format(**read1)) out2.write(read.format(**read2)) i += 1 out1.close() out2.close() # PARSE SAM if ali == 'map': from pytadbit.parsers.map_parser import parse_map as parser else: from pytadbit.parsers.sam_parser import parse_sam as parser parser(['test_read1.%s~' % (ali)], ['test_read2.%s~' % (ali)], './lala1-%s~' % (ali), './lala2-%s~' % (ali), genome, re_name='DPNII', mapper='GEM') # GET INTERSECTION from pytadbit.mapping.mapper import get_intersection get_intersection('lala1-%s~' % (ali), 'lala2-%s~' % (ali), 'lala-%s~' % (ali)) # FILTER from pytadbit.mapping.filter import filter_reads masked = filter_reads('lala-%s~' % (ali))
def test_18_filter_reads(self): if ONLY and ONLY != "18": return if CHKTIME: t0 = time() for ali in ["map", "sam"]: seed(1) if 13436 == int(random() * 100000): same_seed = True genome = generate_random_ali(ali) genome_bis = parse_fasta("test.fa~", verbose=False) self.assertEqual(genome, genome_bis) else: same_seed = False genome = parse_fasta("test.fa~") # PARSE SAM if ali == "map": from pytadbit.parsers.map_parser import parse_map as parser else: try: from pytadbit.parsers.sam_parser import parse_sam as parser except ImportError: print "ERROR: PYSAM not found, skipping test\n" continue parser( ["test_read1.%s~" % (ali)], ["test_read2.%s~" % (ali)], "./lala1-%s~" % (ali), "./lala2-%s~" % (ali), genome, re_name="DPNII", mapper="GEM", ) # GET INTERSECTION from pytadbit.mapping import get_intersection get_intersection("lala1-%s~" % (ali), "lala2-%s~" % (ali), "lala-%s~" % (ali)) # FILTER masked = filter_reads("lala-%s~" % (ali), verbose=False, fast=(ali == "map")) self.assertEqual(masked[1]["reads"], 1000) self.assertEqual(masked[2]["reads"], 1000) self.assertEqual(masked[3]["reads"], 1000) self.assertEqual(masked[4]["reads"], 1000) if same_seed: self.assertEqual(masked[5]["reads"], 1110) self.assertEqual(masked[6]["reads"], 2332) self.assertEqual(masked[7]["reads"], 0) self.assertEqual(masked[8]["reads"], 141) self.assertEqual(masked[10]["reads"], 1) else: self.assertTrue(masked[5]["reads"] > 1000) self.assertEqual(masked[9]["reads"], 1000) apply_filter("lala-map~", "lala-map-filt~", masked, filters=[1], reverse=True, verbose=False) self.assertEqual(len([True for l in open("lala-map-filt~") if not l.startswith("#")]), 1000) d = plot_iterative_mapping("lala1-map~", "lala2-map~") self.assertEqual(d[0][1], 6000) if CHKTIME: self.assertEqual(True, True) print "18", time() - t0