Example #1
0
    def runTest(self):
        parser = naivetrack.TrackParser(chromosome_list=self.chromosome_list)
        parser.register_handle(self.file)
        parser.set_maximum_chromosome_size(500000)

        sample_filter = naivetrack.SampleFilter()
        sample_filter.add_frequency_filter("__qual__", 0.1)
        provider = naivetrack.apply_filters(parser, filters=[sample_filter])

        entry = provider.next()
        self.assertEqual(entry.position.coordinate, 1)
        self.assertEqual(entry.position.sequence, 'T')
        self.assertEqual(entry.position.get_chromosome(), 'chr1')
        self.assertEqual(len(parser.buffer), 1)
        self.assertEqual(len(entry.alleles), 2)

        self.assertEqual(entry.alleles[0].sequence, 'G')
        self.assertEqual(entry.alleles[0].depth, 1)
        self.assertEqual(entry.alleles[0].quality, 40)

        self.assertEqual(entry.alleles[1].sequence, 'T')
        self.assertEqual(entry.alleles[1].depth, 1)
        self.assertEqual(entry.alleles[1].quality, 55)

        self.assertEqual(entry.statistics.number_of_alleles, 2)
        self.assertEqual(entry.statistics.depth, 2)
        self.assertEqual(entry.statistics.quality, 95)
Example #2
0
    def runTest(self):
        parser = naivetrack.TrackParser(chromosome_list=self.chromosome_list)
        parser.register_handle(self.file)
        parser.set_maximum_chromosome_size(500000)

        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 1)
        self.assertEqual(entry.position.sequence, 'T')
        self.assertEqual(entry.position.get_chromosome(), 'chr1')
        self.assertEqual(len(parser.buffer), 1)
        self.assertEqual(len(entry.alleles), 1)

        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 20000)
        self.assertEqual(entry.position.sequence, 'T')
        self.assertEqual(entry.position.get_chromosome(), 'chr1')
        self.assertEqual(len(parser.buffer), 2)
        self.assertEqual(len(entry.alleles), 1)

        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 20001)
        self.assertEqual(entry.position.sequence, 'T')
        self.assertEqual(entry.position.get_chromosome(), 'chr1')
        self.assertEqual(len(parser.buffer), 2)
        self.assertEqual(len(entry.alleles), 1)

        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 20001)
        self.assertEqual(entry.position.sequence, 'G')
        self.assertEqual(entry.position.get_chromosome(), 'chr2')
        self.assertEqual(len(parser.buffer), 1)
        self.assertEqual(len(entry.alleles), 1)

        # end of file
        self.assertRaises(StopIteration, parser.next)
Example #3
0
    def runTest(self):
        parser = naivetrack.TrackParser(chromosome_list=self.chromosome_list)
        parser.register_handle(self.file_a)
        parser.register_handle(self.file_b)
        parser.set_maximum_chromosome_size(500000)

        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 1)
        self.assertEqual(entry.statistics.number_of_alleles, 1)
        self.assertEqual(entry.position.sequence, 'T')
        self.assertEqual(len(parser.buffer), 2)

        # second position
        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 3)
        self.assertEqual(entry.statistics.number_of_alleles, 1)
        self.assertEqual(entry.position.sequence, 'T')

        # third position merged 2 entries
        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 20000)
        self.assertEqual(entry.statistics.number_of_alleles, 2)
        self.assertEqual(entry.position.sequence, 'T')
        self.assertEqual(len(parser.buffer), 2)

        # fourth position
        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 20001)
        self.assertEqual(entry.position.sequence, 'T')
        self.assertEqual(len(parser.buffer), 2)

        # fifth position
        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 200000)
        self.assertEqual(entry.position.sequence, 'T')
        self.assertEqual(len(parser.buffer), 1)

        # sixt position: next chromosome test
        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 20001)
        self.assertEqual(entry.position.sequence, 'G')
        self.assertEqual(entry.position.get_chromosome(), 'chr2')
        self.assertEqual(len(parser.buffer), 1)

        # sixt position: next chromosome test
        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 20001)
        self.assertEqual(entry.position.sequence, 'G')
        self.assertEqual(entry.position.get_chromosome(), 'chr3')
        self.assertEqual(len(parser.buffer), 1)

        # end of file
        self.assertRaises(StopIteration, parser.next)
Example #4
0
    def runTest(self):
        summarize = naivetrack.SummarizeEntries()
        summarize_with_strand = naivetrack.SummarizeEntries()
        summarize_with_strand.add_filter(2, "forward", "__quality_f__",
                                         "__depth_f__")

        parser = naivetrack.TrackParser(chromosome_list=self.chromosome_list)
        parser.register_handle(self.file)
        parser.set_maximum_chromosome_size(500000)

        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 1)
        self.assertEqual(entry.position.sequence, 'T')
        self.assertEqual(entry.position.get_chromosome(), 'chr1')
        self.assertEqual(len(entry.alleles), 4)

        dsum = summarize(entry)
        self.assertEqual(dsum['T']['X']['__quality__'], 20)
        self.assertEqual(dsum['T']['X']['__depth__'], 10)
        self.assertEqual(dsum['G']['X']['__quality__'], 40)
        self.assertEqual(dsum['G']['X']['__depth__'], 4)
        self.assertEqual(dsum['G']['Y']['__quality__'], 200)
        self.assertEqual(dsum['G']['Y']['__depth__'], 20)

        strandsum = summarize_with_strand(entry)
        self.assertEqual(strandsum['T']['X']['__quality__'], 20)
        self.assertEqual(strandsum['T']['X']['__quality_f__'], 20)
        self.assertEqual(strandsum['T']['X']['__depth__'], 10)
        self.assertEqual(strandsum['T']['X']['__depth_f__'], 10)

        self.assertEqual(strandsum['G']['X']['__quality__'], 40)
        self.assertEqual(strandsum['G']['X']['__quality_f__'], 20)
        self.assertEqual(strandsum['G']['X']['__depth__'], 4)
        self.assertEqual(strandsum['G']['X']['__depth_f__'], 1)

        self.assertEqual(strandsum['G']['Y']['__quality__'], 200)
        self.assertEqual(strandsum['G']['Y']['__quality_f__'], 200)
        self.assertEqual(strandsum['G']['Y']['__depth__'], 20)
        self.assertEqual(strandsum['G']['Y']['__depth_f__'], 20)

        entry = parser.next()
        self.assertEqual(entry.position.coordinate, 20000)
        self.assertEqual(entry.position.sequence, 'T')
        self.assertEqual(entry.position.get_chromosome(), 'chr1')
        self.assertEqual(len(entry.alleles), 1)

        # end of file
        self.assertRaises(StopIteration, parser.next)
Example #5
0
    def main():
        """Perform the main program loop."""
        # prepare data parsing
        chromosome_list = []
        provider = naivetrack.TrackParser(chromosome_list=chromosome_list)
        provider.set_maximum_chromosome_size(3000000000)

        # prepare the filter
        fltr = naivetrack.SampleFilter()
        fout = sys.stdout

        # parse the commandline options
        shortopt = "i:o:h"
        longopt = [
            "input=", "output=", "help", "minimum-quality=",
            "maximum-quality=", "minimum-quality-frequency=",
            "maximum-quality-frequency=", "minimum-depth=", "maximum-depth=",
            "remove-N", "no-reference", "minimum-depth-frequency=",
            "maximum-depth-frequency="
        ]
        try:
            opts, _ = getopt.getopt(sys.argv[1:], shortopt, longopt)
            for opt, ans in opts:
                if opt in ("-i", "--input"):
                    if ans != "-":
                        provider.register_handle(
                            open(os.path.abspath(ans), 'r'))
                    else:
                        provider.register_handle(sys.stdin)
                elif opt in ("-o", "--output"):
                    fout = open(ans, 'w')
                elif opt == "--minimum-quality":
                    fltr.add_value_filter('__qual__', lowerbound=int(ans))
                elif opt == "--maximum-quality":
                    fltr.add_value_filter('__qual__', upperbound=int(ans))
                elif opt == "--minimum-quality-frequency":
                    fltr.add_frequency_filter('__qual__',
                                              lowerbound=float(ans))
                elif opt == "--maximum-quality-frequency":
                    fltr.add_frequency_filter('__qual__',
                                              upperbound=float(ans))
                elif opt == "--minimum-depth":
                    fltr.add_value_filter('__depth__', lowerbound=int(ans))
                elif opt == "--maximum-depth":
                    fltr.add_value_filter('__depth__', upperbound=int(ans))
                elif opt == "--minimum-depth-frequency":
                    fltr.add_frequency_filter('__depth__',
                                              lowerbound=float(ans))
                elif opt == "--maximum-depth-frequency":
                    fltr.add_frequency_filter('__depth__',
                                              upperbound=float(ans))
                elif opt == "--remove-N":
                    fltr.remove_N_alleles = True
                elif opt == "--no-reference":
                    fltr.remove_all_reference = True
                elif opt in ('-h', "--help"):
                    usage("Help was asked", error=0)
        except getopt.GetoptError as err:
            usage(str(err), error=101)

        # register filters
        if fltr.n_filters_defined() > 0:
            provider = naivetrack.apply_filters(provider=provider,
                                                filters=[fltr])

        # prepare the output writer
        outwriter = table.VariantWriter(fout, chromosome_list)

        # process the variants
        naivetrack_to_annoinput(provider, writer=outwriter)
        del outwriter