def testReadKoFile(koFile): kPmap = readKOFile(koFile, 'PATHWAY') assert ('K00397' not in kPmap) myAssertEq(kPmap['K00399'], ['ko00680 Methane metabolism', 'ko01200 Carbon metabolism']) kEmap = readKOFile(koFile, 'EC') myAssertEq(kEmap['K00397'], ['EC:1.8.99.-']) myAssertEq(kEmap['K00399'], ['EC:2.8.4.1'])
def testReadKoFile(koFile): kPmap = readKOFile(koFile, 'PATHWAY') assert('K00397' not in kPmap) myAssertEq(kPmap['K00399'], ['ko00680 Methane metabolism', 'ko01200 Carbon metabolism']) kEmap = readKOFile(koFile, 'EC') myAssertEq(kEmap['K00397'], ['EC:1.8.99.-']) myAssertEq(kEmap['K00399'], ['EC:2.8.4.1'])
def testReadKeggFile(keggFile): kDmap = parse_keg_file(keggFile, 'DESCRIPTION') myAssertEq(kDmap['K01623'], 'ALDO; fructose-bisphosphate aldolase, class I [EC:4.1.2.13]') kPmap = parse_keg_file(keggFile, 'PATHWAY') assert('K04519' in kPmap) assert('K15634' in kPmap) myAssertEq(kPmap['K03011'], ['00230 Purine metabolism', '00240 Pyrimidine metabolism', '03020 RNA polymerase', "05016 Huntington's disease", '05169 Epstein-Barr virus infection']) k2map = parse_keg_file(keggFile, 2) myAssertEq(k2map['K13810'][0].lower(), 'Carbohydrate Metabolism'.lower()) myAssertEq(k2map['K13810'][1].lower(), 'Overview'.lower()) myAssertEq(k2map['K00399'][0].lower(), 'Energy Metabolism'.lower()) myAssertEq(k2map['K00399'][1].lower(), 'Overview'.lower()) k3map = parse_keg_file(keggFile, 3) myAssertEq(k3map['K13810'], [ '00010 Glycolysis / Gluconeogenesis [PATH:ko00010]', '00030 Pentose phosphate pathway [PATH:ko00030]', '00500 Starch and sucrose metabolism [PATH:ko00500]', '00520 Amino sugar and nucleotide sugar metabolism [PATH:ko00520]', '01230 Biosynthesis of amino acids [PATH:ko01230]', ]) myAssertEq(k3map['K00399'], [ '00680 Methane metabolism [PATH:ko00680]', '01200 Carbon metabolism [PATH:ko01200]', ]) myAssertEq(k3map['K03404'], [ '00860 Porphyrin and chlorophyll metabolism [PATH:ko00860]', ]) k3mapQ = parse_keg_file(keggFile, '3') for k in k3map.keys(): try: myAssertEq(k3map[k], k3mapQ[k]) except AssertionError: raise AssertionError( "level 3 classes for %s do not match:\n%s\n%s" % (k, k3map[k], k3mapQ[k]))
def testParseGeneLink(koFile): gkmap = parseLinkFile(koFile) myAssertEq(gkmap['ggo:101148121'], ['K16534']) myAssertEq(gkmap['olu:OSTLU_15108'], ['K11126']) myAssertEq(gkmap['ebt:EBL_c03070'], ['K02879']) myAssertEq(gkmap['pec:W5S_4205'], ['K00363']) myAssertEq(gkmap['buc:BU148'], ['K03101']) myAssertEq(gkmap['smaf:D781_0330'], ['K06925']) myAssertEq(gkmap['nkr:NKOR_05565'], ['K03524'])
def test(): import sys global myAssertEq, myAssertIs from edl.test import myAssertEq, myAssertIs if len(sys.argv)>1: loglevel=logging.DEBUG else: loglevel=logging.WARN logging.basicConfig(stream=sys.stderr, level=loglevel) m8data=["001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3 gi|91763278|ref|ZP_01265242.1| Peptidase family M48 [Candidatus Pelagibacter ubique HTCC1002] 61.7021276595745 94 282 1 57 150 134 4e-30 0.992957746478873\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3 gi|71083682|ref|YP_266402.1| M48 family peptidase [Candidatus Pelagibacter ubique HTCC1062] 61.7021276595745 94 282 1 40 133 134 4e-30 0.992957746478873\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3 gi|262277211|ref|ZP_06055004.1| peptidase family M48 family [alpha proteobacterium HIMB114] 65.9090909090909 88 264 1 63 150 132 9e-30 0.929577464788732\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3 gi|254456035|ref|ZP_05069464.1| peptidase family M48 [Candidatus Pelagibacter sp. HTCC7211] 66.2790697674419 86 258 1 65 150 132 2e-29 0.908450704225352\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3 gi|118581678|ref|YP_902928.1| peptidase M48, Ste24p [Pelobacter propionicus DSM 2379] 51.6129032258064 93 282 4 53 144 108 2e-22 0.982394366197183\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3 gi|255534285|ref|YP_003094656.1| zn-dependent protease with chaperone function [Flavobacteriaceae bacterium 3519-10] 47.3118279569892 93 282 4 55 146 102 1e-20 0.982394366197183\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3 gi|317502588|ref|ZP_07960709.1| M48B family peptidase [Prevotella salivae DSM 15606] 51.6129032258064 93 279 1 85 176 100 7e-20 0.982394366197183\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3 gi|325104752|ref|YP_004274406.1| peptidase M48 Ste24p [Pedobacter saltans DSM 12145] 48.3870967741936 93 279 1 59 150 100 7e-20 0.982394366197183\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3 gi|256425464|ref|YP_003126117.1| peptidase M48 Ste24p [Chitinophaga pinensis DSM 2588] 48.8888888888889 90 273 4 58 146 99.8 9e-20 0.950704225352113\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3 gi|299142895|ref|ZP_07036022.1| peptidase, M48 family [Prevotella oris C735] 50 94 282 1 58 150 99.4 1e-19 0.992957746478873\n"] #test1 passthrough logging.info("Starting passthrough test") m8stream=m8data.__iter__() params=FilterParams() outs = filterM8Stream(m8stream,params) myAssertEq(outs.next(),m8data[0]) myAssertEq(outs.next(),m8data[1]) myAssertEq(outs.next(),m8data[2]) myAssertEq(outs.next(),m8data[3]) myAssertEq(outs.next(),m8data[4]) logging.info("Starting best test") m8stream=m8data.__iter__() params=FilterParams(topPct=0.) outs = filterM8Stream(m8stream,params) myAssertEq(outs.next(),m8data[0]) myAssertEq(outs.next(),m8data[1]) try: outs.next() sys.exit("There should only be 2 elements!") except StopIteration: pass logging.info("Starting n1 test") m8stream=m8data.__iter__() params = FilterParams(hitsPerRead=1) outs = filterM8Stream(m8stream,params) myAssertEq(outs.next(),m8data[0]) try: outs.next() sys.exit("There should only be 1 elements!") except StopIteration: pass
def test_gff(): setup_tests() line = 'KM282-20-02b-5_c283151\tcsearch\ttRNA\t303\t233\t' + \ '40.6\t-\t.\tTarget=RF00005 2 70\n' hit = Hit(line, 'gff') myAssertEq(hit.read, 'KM282-20-02b-5_c283151') myAssertEq(hit.score, 40.6) myAssertEq(hit.hit, 'RF00005') line = 'KM282-20-02a-100_c12273\tbarrnap:0.7\trRNA\t9\t772\t' + \ '6.8e-41\t+\t.\tName=12S_rRNA;product=12S ribosomal RNA\n' hit = Hit(line, 'gff') myAssertEq(hit.read, 'KM282-20-02a-100_c12273') myAssertEq(hit.evalue, 6.8e-41) myAssertEq(hit.hit, '12S_rRNA') line = 'KM282-20-02a-100_c1\tProdigal_v2.6.2\tCDS\t309\t686\t' + \ '53.3\t-\t0\tID=1_2;partial=00;start_type=ATG;rbs_motif=' + \ 'AGGA;rbs_spacer=5-10bp;gc_cont=0.381;conf=100.00;score=' + \ '54.54;cscore=45.68;sscore=8.86;rscore=5.50;uscore=-0.63;' + \ 'tscore=2.76;\n' hit = Hit(line, 'gff') myAssertEq(hit.read, 'KM282-20-02a-100_c1') myAssertEq(hit.score, 53.3) myAssertEq(hit.hit, '1_2')
def test(): setup_tests() m8data = ["001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3" " gi|91763278|ref|ZP_01265242.1| Peptidase family" " M48 [Candidatus Pelagibacter ubique HTCC1002]" " 61.7021276595745 94 282 1 57 150 134 4e-30" " 0.992957746478873\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3" " gi|71083682|ref|YP_266402.1|" " M48 family peptidase [Candidatus Pelagibacter ubique" " HTCC1062] 61.7021276595745 94 282 1 40 133 134" " 4e-30 0.992957746478873\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3" " gi|262277211|ref|ZP_06055004.1| peptidase family M48 family" " [alpha proteobacterium HIMB114] 65.9090909090909 88 264" " 1 63 150 132 9e-30 0.929577464788732\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3" " gi|254456035|ref|ZP_05069464.1| peptidase family M48" " [Candidatus Pelagibacter sp. HTCC7211] 66.2790697674419" " 86 258 1 65 150 132 2e-29 0.908450704225352\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3" " gi|118581678|ref|YP_902928.1| peptidase M48, Ste24p" " [Pelobacter propionicus DSM 2379] 51.6129032258064 93" " 282 4 53 144 108 2e-22 0.982394366197183\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3" " gi|255534285|ref|YP_003094656.1| zn-dependent protease" " with chaperone function [Flavobacteriaceae bacterium" " 3519-10] 47.3118279569892 93 282 4 55 146 102" " 1e-20 0.982394366197183\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3" " gi|317502588|ref|ZP_07960709.1| M48B family peptidase" " [Prevotella salivae DSM 15606] 51.6129032258064 93" " 279 1 85 176 100 7e-20 0.982394366197183\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3" " gi|325104752|ref|YP_004274406.1| peptidase M48 Ste24p" " [Pedobacter saltans DSM 12145] 48.3870967741936 93" " 279 1 59 150 100 7e-20 0.982394366197183\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3" " gi|256425464|ref|YP_003126117.1| peptidase M48 Ste24p" " [Chitinophaga pinensis DSM 2588] 48.8888888888889" " 90 273 4 58 146 99.8 9e-20 0.950704225352113\n", "001598_1419_3101 H186x25M length=284 uaccno=E3N7QM101DQXE3" " gi|299142895|ref|ZP_07036022.1| peptidase, M48 family" " [Prevotella oris C735] 50 94 282 1 58 150 99.4" " 1e-19 0.992957746478873\n"] # test1 passthrough logging.info("Starting passthrough test") m8stream = m8data.__iter__() params = FilterParams() outs = filterM8Stream(m8stream, params, returnLines=True) myAssertEq(next(outs), m8data[0]) myAssertEq(next(outs), m8data[1]) myAssertEq(next(outs), m8data[2]) myAssertEq(next(outs), m8data[3]) myAssertEq(next(outs), m8data[4]) logging.info("Starting best test") m8stream = m8data.__iter__() params = FilterParams(top_pct=0.) outs = filterM8Stream(m8stream, params) myAssertEq(next(outs), m8data[0]) myAssertEq(next(outs), m8data[1]) try: next(outs) sys.exit("There should only be 2 elements!") except StopIteration: pass logging.info("Starting n1 test") m8stream = m8data.__iter__() params = FilterParams(hits_per_read=1) outs = filterM8Stream(m8stream, params) myAssertEq(next(outs), m8data[0]) try: next(outs) sys.exit("There should only be 1 elements!") except StopIteration: pass
def testReadKeggFile(keggFile): kDmap = readKeggFile(keggFile, 'DESCRIPTION') myAssertEq(kDmap['K09630'], ['PRSS36; protease, serine, 36 [EC:3.4.21.-]']) kPmap = readKeggFile(keggFile, 'PATHWAY') assert('K00397' in kPmap) myAssertEq(kPmap['K00399'], ['01200 Carbon metabolism', '00680 Methane metabolism', '01000 Enzymes']) k2map = readKeggFile(keggFile, 2) myAssertEq(k2map['K13810'][1].lower(), 'Carbohydrate Metabolism'.lower()) myAssertEq(k2map['K13810'][0].lower(), 'Overview'.lower()) myAssertEq(k2map['K00399'][1].lower(), 'Energy Metabolism'.lower()) myAssertEq(k2map['K00399'][0].lower(), 'Overview'.lower()) k3map = readKeggFile(keggFile, 3) myAssertEq(k3map['K13810'], ['01230 Biosynthesis of amino acids', '00010 Glycolysis / Gluconeogenesis', '00030 Pentose phosphate pathway', '00500 Starch and sucrose metabolism', '00520 Amino sugar and nucleotide sugar metabolism', '01000 Enzymes', '01000 Enzymes']) myAssertEq(k3map['K00399'], ['01200 Carbon metabolism', '00680 Methane metabolism', '01000 Enzymes']) myAssertEq( k3map['K03404'], [ '00860 Porphyrin and chlorophyll metabolism', '01000 Enzymes']) myAssertEq(k3map['K01976'], ['01000 Enzymes']) myAssertEq(k3map['K07347'], ['02000 Transporters', '02044 Secretion system', '02035 Bacterial motility proteins', '05133 Pertussis']) myAssertEq(k3map['K09630'], ['01000 Enzymes', '01002 Peptidases']) k3mapQ = readKeggFile(keggFile, '3') for k in k3map.keys(): try: myAssertEq(k3map[k], k3mapQ[k]) except AssertionError: raise AssertionError( "level 3 classes for %s do not match:\n%s\n%s" % (k, k3map[k], k3mapQ[k]))
def testReadKeggFile(keggFile): kDmap = readKeggFile(keggFile, 'DESCRIPTION') myAssertEq(kDmap['K09630'], ['PRSS36; protease, serine, 36 [EC:3.4.21.-]']) kPmap = readKeggFile(keggFile, 'PATHWAY') assert ('K00397' in kPmap) myAssertEq(kPmap['K00399'], [ '01200 Carbon metabolism', '00680 Methane metabolism', '01000 Enzymes' ]) k2map = readKeggFile(keggFile, 2) myAssertEq(k2map['K13810'][1].lower(), 'Carbohydrate Metabolism'.lower()) myAssertEq(k2map['K13810'][0].lower(), 'Overview'.lower()) myAssertEq(k2map['K00399'][1].lower(), 'Energy Metabolism'.lower()) myAssertEq(k2map['K00399'][0].lower(), 'Overview'.lower()) k3map = readKeggFile(keggFile, 3) myAssertEq(k3map['K13810'], [ '01230 Biosynthesis of amino acids', '00010 Glycolysis / Gluconeogenesis', '00030 Pentose phosphate pathway', '00500 Starch and sucrose metabolism', '00520 Amino sugar and nucleotide sugar metabolism', '01000 Enzymes', '01000 Enzymes' ]) myAssertEq(k3map['K00399'], [ '01200 Carbon metabolism', '00680 Methane metabolism', '01000 Enzymes' ]) myAssertEq(k3map['K03404'], ['00860 Porphyrin and chlorophyll metabolism', '01000 Enzymes']) myAssertEq(k3map['K01976'], ['01000 Enzymes']) myAssertEq(k3map['K07347'], [ '02000 Transporters', '02044 Secretion system', '02035 Bacterial motility proteins', '05133 Pertussis' ]) myAssertEq(k3map['K09630'], ['01000 Enzymes', '01002 Peptidases']) k3mapQ = readKeggFile(keggFile, '3') for k in k3map.keys(): try: myAssertEq(k3map[k], k3mapQ[k]) except AssertionError: raise AssertionError( "level 3 classes for %s do not match:\n%s\n%s" % (k, k3map[k], k3mapQ[k]))