Exemple #1
0
    def test_18_filter_reads(self):
        if ONLY and ONLY != '18':
            return
        if CHKTIME:
            t0 = time()
        for ali in ['map', 'sam']:
            seed(1)
            if 13436 == int(random()*100000):
                same_seed = True
                genome = generate_random_ali(ali)
                genome_bis = parse_fasta('test.fa~', verbose=False)
                self.assertEqual(genome, genome_bis)
            else:
                same_seed = False
                genome = parse_fasta('test.fa~')
            # PARSE SAM
            if ali == 'map':
                from pytadbit.parsers.map_parser import parse_map as parser
            else:
                try:
                    from pytadbit.parsers.sam_parser import parse_sam as parser
                except ImportError:
                    print 'ERROR: PYSAM not found, skipping test\n'
                    continue

            parser(['test_read1.%s~' % (ali)], ['test_read2.%s~' % (ali)],
                   './lala1-%s~' % (ali), './lala2-%s~' % (ali), genome,
                   re_name='DPNII', mapper='GEM')

            # GET INTERSECTION
            from pytadbit.mapping import get_intersection
            get_intersection('lala1-%s~' % (ali), 'lala2-%s~' % (ali),
                             'lala-%s~' % (ali))
            # FILTER
            masked = filter_reads('lala-%s~' % (ali), verbose=False,
                                  fast=(ali=='map'))
            self.assertEqual(masked[1]['reads'], 1000)
            self.assertEqual(masked[2]['reads'], 1000)
            self.assertEqual(masked[3]['reads'], 1000)
            self.assertEqual(masked[4]['reads'], 1000)
            if same_seed:
                self.assertEqual(masked[5]['reads'], 1110)
                self.assertEqual(masked[6]['reads'], 2332)
                self.assertEqual(masked[7]['reads'], 0)
                self.assertEqual(masked[8]['reads'], 141)
                self.assertEqual(masked[10]['reads'], 1)
            else:
                self.assertTrue (masked[5]['reads'] > 1000)
            self.assertEqual(masked[9]['reads'], 1000)
        apply_filter('lala-map~', 'lala-map-filt~', masked, filters=[1],
                     reverse=True, verbose=False)
        self.assertEqual(len([True for l in open('lala-map-filt~')
                              if not l.startswith('#')]), 1000)
        d = plot_iterative_mapping('lala1-map~', 'lala2-map~')
        self.assertEqual(d[0][1], 6000)

        if CHKTIME:
            self.assertEqual(True, True)
            print '18', time() - t0
Exemple #2
0
    def test_18_filter_reads(self):
        if ONLY and not "18" in ONLY:
            return
        if CHKTIME:
            t0 = time()
        for ali in ["map", "sam"]:
            seed(1)
            if 13436 == int(random()*100000):
                same_seed = True
                genome = generate_random_ali(ali)
                genome_bis = parse_fasta("test.fa~", verbose=False)
                self.assertEqual(genome, genome_bis)
            else:
                same_seed = False
                genome = parse_fasta("test.fa~")
            # PARSE SAM
            if ali == "map":
                from pytadbit.parsers.map_parser import parse_map as parser
            else:
                try:
                    from pytadbit.parsers.sam_parser import parse_sam as parser
                except ImportError:
                    print "ERROR: PYSAM not found, skipping test\n"
                    continue

            parser(["test_read1.%s~" % (ali)], ["test_read2.%s~" % (ali)],
                   "./lala1-%s~" % (ali), "./lala2-%s~" % (ali), genome,
                   re_name="DPNII", mapper="GEM")

            # GET INTERSECTION
            from pytadbit.mapping import get_intersection
            get_intersection("lala1-%s~" % (ali), "lala2-%s~" % (ali),
                             "lala-%s~" % (ali))
            # FILTER
            masked = filter_reads("lala-%s~" % (ali), verbose=False,
                                  fast=(ali=="map"))
            self.assertEqual(masked[1]["reads"], 1000)
            self.assertEqual(masked[2]["reads"], 1000)
            self.assertEqual(masked[3]["reads"], 1000)
            self.assertEqual(masked[4]["reads"], 1000)
            if same_seed:
                self.assertEqual(masked[5]["reads"], 1110)
                self.assertEqual(masked[6]["reads"], 2332)
                self.assertEqual(masked[7]["reads"], 0)
                self.assertEqual(masked[8]["reads"], 141)
                self.assertEqual(masked[10]["reads"], 1)
            else:
                self.assertTrue (masked[5]["reads"] > 1000)
            self.assertEqual(masked[9]["reads"], 1000)
        apply_filter("lala-map~", "lala-map-filt~", masked, filters=[1],
                     reverse=True, verbose=False)
        self.assertEqual(len([True for l in open("lala-map-filt~")
                              if not l.startswith("#")]), 1000)
        d = plot_iterative_mapping("lala1-map~", "lala2-map~")
        self.assertEqual(d[0][1], 6000)

        if CHKTIME:
            self.assertEqual(True, True)
            print "18", time() - t0
Exemple #3
0
    def test_18_filter_reads(self):
        if ONLY and ONLY != '18':
            return
        if CHKTIME:
            t0 = time()
        for ali in ['map', 'sam']:
            seed(1)
            if 13436 == int(random()*100000):
                same_seed = True
                genome = generate_random_ali(ali)
                genome_bis = parse_fasta('test.fa~', verbose=False)
                self.assertEqual(genome, genome_bis)
            else:
                same_seed = False
                genome = parse_fasta('test.fa~')
            # PARSE SAM
            if ali == 'map':
                from pytadbit.parsers.map_parser import parse_map as parser
            else:
                try:
                    from pytadbit.parsers.sam_parser import parse_sam as parser
                except ImportError:
                    print 'ERROR: PYSAM not found, skipping test\n'
                    continue

            parser(['test_read1.%s~' % (ali)], ['test_read2.%s~' % (ali)],
                   './lala1-%s~' % (ali), './lala2-%s~' % (ali), genome,
                   re_name='DPNII', mapper='GEM')

            # GET INTERSECTION
            from pytadbit.mapping import get_intersection
            get_intersection('lala1-%s~' % (ali), 'lala2-%s~' % (ali),
                             'lala-%s~' % (ali))
            # FILTER
            masked = filter_reads('lala-%s~' % (ali), verbose=False,
                                  fast=(ali=='map'))
            self.assertEqual(masked[1]['reads'], 1000)
            self.assertEqual(masked[2]['reads'], 1000)
            self.assertEqual(masked[3]['reads'], 1000)
            self.assertEqual(masked[4]['reads'], 1000)
            if same_seed:
                self.assertEqual(masked[5]['reads'], 1110)
                self.assertEqual(masked[6]['reads'], 2332)
                self.assertEqual(masked[7]['reads'], 0)
                self.assertEqual(masked[8]['reads'], 141)
                self.assertEqual(masked[10]['reads'], 1)
            else:
                self.assertTrue (masked[5]['reads'] > 1000)
            self.assertEqual(masked[9]['reads'], 1000)
        apply_filter('lala-map~', 'lala-map-filt~', masked, filters=[1],
                     reverse=True, verbose=False)
        self.assertEqual(len([True for l in open('lala-map-filt~')
                              if not l.startswith('#')]), 1000)
        d = plot_iterative_mapping('lala1-map~', 'lala2-map~')
        self.assertEqual(d[0][1], 6000)

        if CHKTIME:
            self.assertEqual(True, True)
            print '18', time() - t0
Exemple #4
0
        }
        out1.write(read.format(**read1))
        out2.write(read.format(**read2))
    i += 1

out1.close()
out2.close()

# PARSE SAM
if ali == 'map':
    from pytadbit.parsers.map_parser import parse_map as parser
else:
    from pytadbit.parsers.sam_parser import parse_sam as parser

parser(['test_read1.%s~' % (ali)], ['test_read2.%s~' % (ali)],
       './lala1-%s~' % (ali),
       './lala2-%s~' % (ali),
       genome,
       re_name='DPNII',
       mapper='GEM')

# GET INTERSECTION
from pytadbit.mapping.mapper import get_intersection

get_intersection('lala1-%s~' % (ali), 'lala2-%s~' % (ali), 'lala-%s~' % (ali))

# FILTER
from pytadbit.mapping.filter import filter_reads

masked = filter_reads('lala-%s~' % (ali))
    else:
        read2 = {'crm': crm1, 'pos': pos1, 'flag': flags[sd1], 'id': 'lala05.1%011d' % (i)}
        read1 = {'crm': crm2, 'pos': pos2, 'flag': flags[sd2], 'id': 'lala05.1%011d' % (i)}
        out1.write(read.format(**read1))
        out2.write(read.format(**read2))
    i += 1

out1.close()
out2.close()

# PARSE SAM
if ali == 'map':
    from pytadbit.parsers.map_parser import parse_map as parser
else:
    from pytadbit.parsers.sam_parser import parse_sam as parser

parser(['test_read1.%s~' % (ali)], ['test_read2.%s~' % (ali)],
       './lala1-%s~' % (ali), './lala2-%s~' % (ali), genome,
       re_name='DPNII', mapper='GEM')

# GET INTERSECTION
from pytadbit.mapping.mapper import get_intersection

get_intersection('lala1-%s~' % (ali), 'lala2-%s~' % (ali), 'lala-%s~' % (ali))

# FILTER
from pytadbit.mapping.filter import filter_reads

masked = filter_reads('lala-%s~' % (ali))

Exemple #6
0
    def test_18_filter_reads(self):
        if ONLY and ONLY != "18":
            return
        if CHKTIME:
            t0 = time()
        for ali in ["map", "sam"]:
            seed(1)
            if 13436 == int(random() * 100000):
                same_seed = True
                genome = generate_random_ali(ali)
                genome_bis = parse_fasta("test.fa~", verbose=False)
                self.assertEqual(genome, genome_bis)
            else:
                same_seed = False
                genome = parse_fasta("test.fa~")
            # PARSE SAM
            if ali == "map":
                from pytadbit.parsers.map_parser import parse_map as parser
            else:
                try:
                    from pytadbit.parsers.sam_parser import parse_sam as parser
                except ImportError:
                    print "ERROR: PYSAM not found, skipping test\n"
                    continue

            parser(
                ["test_read1.%s~" % (ali)],
                ["test_read2.%s~" % (ali)],
                "./lala1-%s~" % (ali),
                "./lala2-%s~" % (ali),
                genome,
                re_name="DPNII",
                mapper="GEM",
            )

            # GET INTERSECTION
            from pytadbit.mapping import get_intersection

            get_intersection("lala1-%s~" % (ali), "lala2-%s~" % (ali), "lala-%s~" % (ali))
            # FILTER
            masked = filter_reads("lala-%s~" % (ali), verbose=False, fast=(ali == "map"))
            self.assertEqual(masked[1]["reads"], 1000)
            self.assertEqual(masked[2]["reads"], 1000)
            self.assertEqual(masked[3]["reads"], 1000)
            self.assertEqual(masked[4]["reads"], 1000)
            if same_seed:
                self.assertEqual(masked[5]["reads"], 1110)
                self.assertEqual(masked[6]["reads"], 2332)
                self.assertEqual(masked[7]["reads"], 0)
                self.assertEqual(masked[8]["reads"], 141)
                self.assertEqual(masked[10]["reads"], 1)
            else:
                self.assertTrue(masked[5]["reads"] > 1000)
            self.assertEqual(masked[9]["reads"], 1000)
        apply_filter("lala-map~", "lala-map-filt~", masked, filters=[1], reverse=True, verbose=False)
        self.assertEqual(len([True for l in open("lala-map-filt~") if not l.startswith("#")]), 1000)
        d = plot_iterative_mapping("lala1-map~", "lala2-map~")
        self.assertEqual(d[0][1], 6000)

        if CHKTIME:
            self.assertEqual(True, True)
            print "18", time() - t0