コード例 #1
0
    def test_indent_splitter(self):
        """indent_splitter should split lines at correct locations"""
        #if lines have same indent, should not group together
        lines = ['abc    xxx', 'def    yyy']
        self.assertEqual(list(indent_splitter(lines)),\
            [[lines[0]], [lines[1]]])
        #if second line is indented, should group with first
        lines = ['abc    xxx', ' def    yyy']
        self.assertEqual(list(indent_splitter(lines)),\
            [[lines[0], lines[1]]])

        #if both lines indented but second is more, should group with first
        lines = [' abc    xxx', '  def    yyy']
        self.assertEqual(list(indent_splitter(lines)),\
            [[lines[0], lines[1]]])

        #if both lines indented equally, should not group
        lines = ['   abc    xxx', '   def    yyy']
        self.assertEqual(list(indent_splitter(lines)), \
            [[lines[0]], [lines[1]]])

        #for more complex situation, should produce correct grouping
        lines = [
            '  xyz',  #0 -
            '  xxx',  #1 -
            '   yyy',  #2
            '   uuu',  #3
            '   iii',  #4
            '  qaz',  #5 -
            '  wsx',  #6 -
            '   az',  #7
            '   sx',  #8
            '        gb',  #9
            '   bg',  #10
            '  aaa',  #11 -
        ]
        self.assertEqual(list(indent_splitter(lines)), \
            [[lines[0]], lines[1:5], [lines[5]], lines[6:11], [lines[11]]])

        #real example from genbank file
        lines = \
"""LOCUS       NT_016354           92123751 bp    DNA     linear   CON 29-AUG-2006
DEFINITION  H**o sapiens chromosome 4 genomic contig, reference assembly.
ACCESSION   NT_016354 NT_006109 NT_006204 NT_006245 NT_006302 NT_006371
            NT_006397 NT_016393 NT_016589 NT_016599 NT_016606 NT_022752
            NT_022753 NT_022755 NT_022760 NT_022774 NT_022797 NT_022803
            NT_022846 NT_022960 NT_025694 NT_028147 NT_029273 NT_030643
            NT_030646 NT_030662 NT_031780 NT_031781 NT_031791 NT_034703
            NT_034705 NT_037628 NT_037629 NT_079512
VERSION     NT_016354.18  GI:88977422
KEYWORDS    .
SOURCE      H**o sapiens (human)
  ORGANISM  H**o sapiens
            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
            Catarrhini; Hominidae; H**o.
?
REFERENCE   2  (bases 1 to 92123751)
  AUTHORS   International Human Genome Sequencing Consortium.
  TITLE     Finishing the euchromatic sequence of the human genome""".split('\n')
        self.assertEqual(list(indent_splitter(lines)), \
            [[lines[0]],[lines[1]],lines[2:8],[lines[8]],[lines[9]],lines[10:15],\
            [lines[15]], lines[16:]])
コード例 #2
0
ファイル: test_genbank.py プロジェクト: Skylersun/pycogent
    def test_indent_splitter(self):
        """indent_splitter should split lines at correct locations"""
        #if lines have same indent, should not group together
        lines = [
        'abc    xxx',
        'def    yyy'
        ]
        self.assertEqual(list(indent_splitter(lines)),\
            [[lines[0]], [lines[1]]])
        #if second line is indented, should group with first
        lines = [
        'abc    xxx',
        ' def    yyy'
        ]
        self.assertEqual(list(indent_splitter(lines)),\
            [[lines[0], lines[1]]])
        
        #if both lines indented but second is more, should group with first
        lines = [
        ' abc    xxx',
        '  def    yyy'
        ]
        self.assertEqual(list(indent_splitter(lines)),\
            [[lines[0], lines[1]]])
        
        #if both lines indented equally, should not group
        lines = [
        '   abc    xxx',
        '   def    yyy'
        ]
        self.assertEqual(list(indent_splitter(lines)), \
            [[lines[0]], [lines[1]]])
        
        #for more complex situation, should produce correct grouping
        lines = [
        '  xyz',    #0 -
        '  xxx',    #1 -
        '   yyy',   #2
        '   uuu',   #3
        '   iii',   #4
        '  qaz',    #5 -
        '  wsx',    #6 -
        '   az',    #7
        '   sx',    #8
        '        gb',#9
        '   bg',    #10
        '  aaa',    #11 -
        ]
        self.assertEqual(list(indent_splitter(lines)), \
            [[lines[0]], lines[1:5], [lines[5]], lines[6:11], [lines[11]]])
        
        #real example from genbank file
        lines = \
"""LOCUS       NT_016354           92123751 bp    DNA     linear   CON 29-AUG-2006
DEFINITION  H**o sapiens chromosome 4 genomic contig, reference assembly.
ACCESSION   NT_016354 NT_006109 NT_006204 NT_006245 NT_006302 NT_006371
            NT_006397 NT_016393 NT_016589 NT_016599 NT_016606 NT_022752
            NT_022753 NT_022755 NT_022760 NT_022774 NT_022797 NT_022803
            NT_022846 NT_022960 NT_025694 NT_028147 NT_029273 NT_030643
            NT_030646 NT_030662 NT_031780 NT_031781 NT_031791 NT_034703
            NT_034705 NT_037628 NT_037629 NT_079512
VERSION     NT_016354.18  GI:88977422
KEYWORDS    .
SOURCE      H**o sapiens (human)
  ORGANISM  H**o sapiens
            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
            Catarrhini; Hominidae; H**o.
?
REFERENCE   2  (bases 1 to 92123751)
  AUTHORS   International Human Genome Sequencing Consortium.
  TITLE     Finishing the euchromatic sequence of the human genome""".split('\n')
        self.assertEqual(list(indent_splitter(lines)), \
            [[lines[0]],[lines[1]],lines[2:8],[lines[8]],[lines[9]],lines[10:15],\
            [lines[15]], lines[16:]])
コード例 #3
0
        l3_a = Location(10)
        l3_b = Location(12)
        l3 = Location([l3_a, l3_b])
        ll = LocationList([l, l2, l3])
        s = ll.extract('ACGTGCAGTCAGTAGCAT')
        #               123456789012345678
        self.assertEqual(s, 'G' + 'TGC' + 'CAG')
        #check a case where it wraps around
        l5_a = Location(16)
        l5_b = Location(4)
        l5 = Location([l5_a, l5_b])
        ll = LocationList([l5])
        s = ll.extract('ACGTGCAGTCAGTAGCAT')
        self.assertEqual(s, 'CATACGT')


if __name__ == '__main__':
    from sys import argv
    if len(argv) > 2 and argv[1] == 'x':
        filename = argv[2]
        lines = open(filename)
        for i in indent_splitter(lines):
            print '******'
            print i[0]
            for j in indent_splitter(i[1:]):
                print '?????'
                for line in j:
                    print line
    else:
        main()
コード例 #4
0
ファイル: test_genbank.py プロジェクト: Skylersun/pycogent
        l3_a = Location(10)
        l3_b = Location(12)
        l3 = Location([l3_a, l3_b])
        ll = LocationList([l, l2, l3])
        s = ll.extract('ACGTGCAGTCAGTAGCAT')
        #               123456789012345678
        self.assertEqual(s, 'G'+'TGC'+'CAG')
        #check a case where it wraps around
        l5_a = Location(16)
        l5_b = Location(4)
        l5 = Location([l5_a,l5_b])
        ll = LocationList([l5])
        s = ll.extract('ACGTGCAGTCAGTAGCAT')
        self.assertEqual(s, 'CATACGT')

if __name__ == '__main__':
    from sys import argv
    if len(argv) > 2 and argv[1] == 'x':
        filename = argv[2]
        lines = open(filename)
        for i in indent_splitter(lines):
            print '******'
            print i[0]
            for j in indent_splitter(i[1:]):
                print '?????'
                for line in j:
                    print line
    else:
        main()