def test_ignore_equal(self):
        p = AttributesParser('=', ignore=['chromosome', 'junk'])
        record = 'chromosome=1;ID=id0;junk=drawer'

        t = list(p.tokenize(record))
        self.assertEqual(t, [
            'chromosome', '=', '1', ';', 'ID', '=', 'id0', ';', 'junk', '=',
            'drawer'
        ])
        attributes = p(record)
        self.assertEqual(attributes, 1)
        self.assertIn('ID', p.terms)
        self.assertEqual(p.terms['ID'][0], 'id0')
        self.assertNotIn('chromosome', p.terms)
        self.assertEqual(p.reserved['chromosome'], 0)
 def test_numeric_gtf(self):
     p = AttributesParser()
     attributes = p('exon_number 1')
     self.assertEqual(attributes, 1)
     self.assertIn('exon_number', p.terms)
     self.assertEqual(p.terms['exon_number'], {0: 1})
     self.assertIsInstance(p.terms['exon_number'][0], int)
 def test_simple_gtf(self):
     p = AttributesParser()
     attributes = p('gene_id "gene1"')
     self.assertEqual(attributes, 1)
     self.assertIn('gene_id', p.terms)
     self.assertEqual(p.terms['gene_id'], {0: 'gene1'})
     self.assertIsInstance(p.terms['gene_id'][0], str)
    def test_multi_value_gff(self):
        p = AttributesParser('=')
        attributes = p('gene_id=gene1;transcript_id=transcript1;exon_number=1')
        self.assertEqual(attributes, 3)
        line_no = 0
        for name, expected, attribute_type in [('gene_id', 'gene1', str),
                                               ('transcript_id', 'transcript1',
                                                str), ('exon_number', 1, int)]:

            self.assertIn(name, p.terms)
            self.assertEqual(p.terms[name], {line_no: expected})
            self.assertIsInstance(p.terms[name][line_no], attribute_type)
    def test_grcm38(self):
        p = AttributesParser('=')
        attributes = p(
            'ID=id0;Dbxref=taxon:10090;Name=1;chromosome=1;gbkey=Src;genome=chromosome;mol_type=genomic DNA;strain=C57BL/6J'
        )
        self.assertEqual(attributes, 8)
        line_no = 0
        for name, expected, attribute_type in [
            ('ID', 'id0', str),
            ('Dbxref', 'taxon:10090', str),
            ('Name', 1, int),
            ('chromosome1', 1, int),
            ('gbkey', 'Src', str),
            ('genome', 'chromosome', str),
            ('mol_type', 'genomic DNA', str),
        ]:

            self.assertIn(name, p.terms)
            self.assertEqual(p.terms[name], {line_no: expected})
            self.assertIsInstance(p.terms[name][line_no], attribute_type)
 def test_gff_extra_semicolons(self):
     p = AttributesParser('=')
     t = list(p.tokenize(' a=1;; b="b"; '))
     self.assertEqual(t, ['a', '=', '1', ';', 'b', '=', '"b"', ';'])
 def test_gtf_extra_spaces(self):
     p = AttributesParser(' ')
     t = list(p.tokenize(' a 1;    b   "b"; '))
     self.assertEqual(t, ['a', ' ', '1', ';', 'b', ' ', '"b"', ';'])
 def test_empty(self):
     p = AttributesParser()
     attributes = p('')
     self.assertEqual(attributes, 0)
 def test_embedded_sep(self):
     p = AttributesParser(' ')
     t = list(p.tokenize('a "b;c"; d 1'))
     self.assertEqual(t, ['a', ' ', '"b;c"', ';', 'd', ' ', '1'])
 def test_quoting_missing(self):
     p = AttributesParser(' ')
     self.assertRaises(ValueError, list, p.tokenize('a "b'))
 def test_gff_value_with_spaces(self):
     p = AttributesParser('=')
     t = list(p.tokenize('mol_type=genomic DNA'))
     self.assertEqual(t, ['mol_type', '=', 'genomic DNA'])