def test_18_filter_reads(self): if ONLY and ONLY != '18': return if CHKTIME: t0 = time() for ali in ['map', 'sam']: seed(1) if 13436 == int(random()*100000): same_seed = True genome = generate_random_ali(ali) genome_bis = parse_fasta('test.fa~', verbose=False) self.assertEqual(genome, genome_bis) else: same_seed = False genome = parse_fasta('test.fa~') # PARSE SAM if ali == 'map': from pytadbit.parsers.map_parser import parse_map as parser else: try: from pytadbit.parsers.sam_parser import parse_sam as parser except ImportError: print 'ERROR: PYSAM not found, skipping test\n' continue parser(['test_read1.%s~' % (ali)], ['test_read2.%s~' % (ali)], './lala1-%s~' % (ali), './lala2-%s~' % (ali), genome, re_name='DPNII', mapper='GEM') # GET INTERSECTION from pytadbit.mapping import get_intersection get_intersection('lala1-%s~' % (ali), 'lala2-%s~' % (ali), 'lala-%s~' % (ali)) # FILTER masked = filter_reads('lala-%s~' % (ali), verbose=False, fast=(ali=='map')) self.assertEqual(masked[1]['reads'], 1000) self.assertEqual(masked[2]['reads'], 1000) self.assertEqual(masked[3]['reads'], 1000) self.assertEqual(masked[4]['reads'], 1000) if same_seed: self.assertEqual(masked[5]['reads'], 1110) self.assertEqual(masked[6]['reads'], 2332) self.assertEqual(masked[7]['reads'], 0) self.assertEqual(masked[8]['reads'], 141) self.assertEqual(masked[10]['reads'], 1) else: self.assertTrue (masked[5]['reads'] > 1000) self.assertEqual(masked[9]['reads'], 1000) apply_filter('lala-map~', 'lala-map-filt~', masked, filters=[1], reverse=True, verbose=False) self.assertEqual(len([True for l in open('lala-map-filt~') if not l.startswith('#')]), 1000) d = plot_iterative_mapping('lala1-map~', 'lala2-map~') self.assertEqual(d[0][1], 6000) if CHKTIME: self.assertEqual(True, True) print '18', time() - t0
def test_18_filter_reads(self): if ONLY and not "18" in ONLY: return if CHKTIME: t0 = time() for ali in ["map", "sam"]: seed(1) if 13436 == int(random()*100000): same_seed = True genome = generate_random_ali(ali) genome_bis = parse_fasta("test.fa~", verbose=False) self.assertEqual(genome, genome_bis) else: same_seed = False genome = parse_fasta("test.fa~") # PARSE SAM if ali == "map": from pytadbit.parsers.map_parser import parse_map as parser else: try: from pytadbit.parsers.sam_parser import parse_sam as parser except ImportError: print "ERROR: PYSAM not found, skipping test\n" continue parser(["test_read1.%s~" % (ali)], ["test_read2.%s~" % (ali)], "./lala1-%s~" % (ali), "./lala2-%s~" % (ali), genome, re_name="DPNII", mapper="GEM") # GET INTERSECTION from pytadbit.mapping import get_intersection get_intersection("lala1-%s~" % (ali), "lala2-%s~" % (ali), "lala-%s~" % (ali)) # FILTER masked = filter_reads("lala-%s~" % (ali), verbose=False, fast=(ali=="map")) self.assertEqual(masked[1]["reads"], 1000) self.assertEqual(masked[2]["reads"], 1000) self.assertEqual(masked[3]["reads"], 1000) self.assertEqual(masked[4]["reads"], 1000) if same_seed: self.assertEqual(masked[5]["reads"], 1110) self.assertEqual(masked[6]["reads"], 2332) self.assertEqual(masked[7]["reads"], 0) self.assertEqual(masked[8]["reads"], 141) self.assertEqual(masked[10]["reads"], 1) else: self.assertTrue (masked[5]["reads"] > 1000) self.assertEqual(masked[9]["reads"], 1000) apply_filter("lala-map~", "lala-map-filt~", masked, filters=[1], reverse=True, verbose=False) self.assertEqual(len([True for l in open("lala-map-filt~") if not l.startswith("#")]), 1000) d = plot_iterative_mapping("lala1-map~", "lala2-map~") self.assertEqual(d[0][1], 6000) if CHKTIME: self.assertEqual(True, True) print "18", time() - t0
def test_18_filter_reads(self): if ONLY and ONLY != "18": return if CHKTIME: t0 = time() for ali in ["map", "sam"]: seed(1) if 13436 == int(random() * 100000): same_seed = True genome = generate_random_ali(ali) genome_bis = parse_fasta("test.fa~", verbose=False) self.assertEqual(genome, genome_bis) else: same_seed = False genome = parse_fasta("test.fa~") # PARSE SAM if ali == "map": from pytadbit.parsers.map_parser import parse_map as parser else: try: from pytadbit.parsers.sam_parser import parse_sam as parser except ImportError: print "ERROR: PYSAM not found, skipping test\n" continue parser( ["test_read1.%s~" % (ali)], ["test_read2.%s~" % (ali)], "./lala1-%s~" % (ali), "./lala2-%s~" % (ali), genome, re_name="DPNII", mapper="GEM", ) # GET INTERSECTION from pytadbit.mapping import get_intersection get_intersection("lala1-%s~" % (ali), "lala2-%s~" % (ali), "lala-%s~" % (ali)) # FILTER masked = filter_reads("lala-%s~" % (ali), verbose=False, fast=(ali == "map")) self.assertEqual(masked[1]["reads"], 1000) self.assertEqual(masked[2]["reads"], 1000) self.assertEqual(masked[3]["reads"], 1000) self.assertEqual(masked[4]["reads"], 1000) if same_seed: self.assertEqual(masked[5]["reads"], 1110) self.assertEqual(masked[6]["reads"], 2332) self.assertEqual(masked[7]["reads"], 0) self.assertEqual(masked[8]["reads"], 141) self.assertEqual(masked[10]["reads"], 1) else: self.assertTrue(masked[5]["reads"] > 1000) self.assertEqual(masked[9]["reads"], 1000) apply_filter("lala-map~", "lala-map-filt~", masked, filters=[1], reverse=True, verbose=False) self.assertEqual(len([True for l in open("lala-map-filt~") if not l.startswith("#")]), 1000) d = plot_iterative_mapping("lala1-map~", "lala2-map~") self.assertEqual(d[0][1], 6000) if CHKTIME: self.assertEqual(True, True) print "18", time() - t0
plt.rcParams['font.weight'] = 'medium' #plt.rcParams['font.family'] = 'sans-serif' #plt.rcParams['font.sans-serif'] = 'Arial' plt.rcParams['lines.linewidth'] = 2.0 plt.rcParams['legend.numpoints'] = 1 plt.rcParams['legend.frameon'] = False plt.rcParams['savefig.bbox'] = 'tight' # Plot: fraction of mapped reads infiles = [maps1, maps2] pair_id = infiles[0].split("/")[-1].replace("_read1_map.tsv", "") outfile = '%s/%s_plot_proportion_mapped_reads.png' % (POSTMAPPING_PLOTS, pair_id) reads_mapped_per_iteration = plot_iterative_mapping( fnam1=infiles[0], fnam2=infiles[1], total_reads=n_reads_trimmed, savefig=outfile) reads_mapped_per_iteration = pd.DataFrame.from_dict(reads_mapped_per_iteration) reads_mapped_per_iteration.columns = ['read1', 'read2'] fraction_mapped_read1 = list( reads_mapped_per_iteration['read1'])[-1] / float(n_reads_trimmed) fraction_mapped_read2 = list( reads_mapped_per_iteration['read2'])[-1] / float(n_reads_trimmed) fraction_mapped_str = ",".join( [str(i) for i in [fraction_mapped_read1, fraction_mapped_read2]]) # Plot: distribution of dangling-end lengths plt.rcParams['font.size'] = 12 infile = '%s/%s_both_map.tsv' % (PROCESSED, pair_id) outfile = '%s/%s_plot_distribution_dangling_ends_lengths.png' % (