예제 #1
0
def testReadKoFile(koFile):
    kPmap = readKOFile(koFile, 'PATHWAY')
    assert ('K00397' not in kPmap)
    myAssertEq(kPmap['K00399'],
               ['ko00680  Methane metabolism', 'ko01200  Carbon metabolism'])

    kEmap = readKOFile(koFile, 'EC')
    myAssertEq(kEmap['K00397'], ['EC:1.8.99.-'])
    myAssertEq(kEmap['K00399'], ['EC:2.8.4.1'])
예제 #2
0
def testReadKoFile(koFile):
    kPmap = readKOFile(koFile, 'PATHWAY')
    assert('K00397' not in kPmap)
    myAssertEq(kPmap['K00399'],
               ['ko00680  Methane metabolism',
                'ko01200  Carbon metabolism'])

    kEmap = readKOFile(koFile, 'EC')
    myAssertEq(kEmap['K00397'], ['EC:1.8.99.-'])
    myAssertEq(kEmap['K00399'], ['EC:2.8.4.1'])
예제 #3
0
def testReadKeggFile(keggFile):
    kDmap = parse_keg_file(keggFile, 'DESCRIPTION')
    myAssertEq(kDmap['K01623'],
               'ALDO; fructose-bisphosphate aldolase, class I [EC:4.1.2.13]')
    kPmap = parse_keg_file(keggFile, 'PATHWAY')
    assert('K04519' in kPmap)
    assert('K15634' in kPmap)
    myAssertEq(kPmap['K03011'],
               ['00230 Purine metabolism',
                '00240 Pyrimidine metabolism',
                '03020 RNA polymerase',
                "05016 Huntington's disease",
                '05169 Epstein-Barr virus infection'])
    k2map = parse_keg_file(keggFile, 2)
    myAssertEq(k2map['K13810'][0].lower(), 'Carbohydrate Metabolism'.lower())
    myAssertEq(k2map['K13810'][1].lower(), 'Overview'.lower())
    myAssertEq(k2map['K00399'][0].lower(), 'Energy Metabolism'.lower())
    myAssertEq(k2map['K00399'][1].lower(), 'Overview'.lower())
    k3map = parse_keg_file(keggFile, 3)
    myAssertEq(k3map['K13810'], [
        '00010 Glycolysis / Gluconeogenesis [PATH:ko00010]',
        '00030 Pentose phosphate pathway [PATH:ko00030]',
        '00500 Starch and sucrose metabolism [PATH:ko00500]',
        '00520 Amino sugar and nucleotide sugar metabolism [PATH:ko00520]',
        '01230 Biosynthesis of amino acids [PATH:ko01230]',
    ])
    myAssertEq(k3map['K00399'], [
        '00680 Methane metabolism [PATH:ko00680]',
        '01200 Carbon metabolism [PATH:ko01200]',
    ])
    myAssertEq(k3map['K03404'], [
        '00860 Porphyrin and chlorophyll metabolism [PATH:ko00860]',
    ])
    k3mapQ = parse_keg_file(keggFile, '3')
    for k in k3map.keys():
        try:
            myAssertEq(k3map[k], k3mapQ[k])
        except AssertionError:
            raise AssertionError(
                "level 3 classes for %s do not match:\n%s\n%s" %
                (k, k3map[k], k3mapQ[k]))
예제 #4
0
def testParseGeneLink(koFile):
    gkmap = parseLinkFile(koFile)
    myAssertEq(gkmap['ggo:101148121'], ['K16534'])
    myAssertEq(gkmap['olu:OSTLU_15108'], ['K11126'])
    myAssertEq(gkmap['ebt:EBL_c03070'], ['K02879'])
    myAssertEq(gkmap['pec:W5S_4205'], ['K00363'])
    myAssertEq(gkmap['buc:BU148'], ['K03101'])
    myAssertEq(gkmap['smaf:D781_0330'], ['K06925'])
    myAssertEq(gkmap['nkr:NKOR_05565'], ['K03524'])
예제 #5
0
def test():
    import sys
    global myAssertEq, myAssertIs
    from edl.test import myAssertEq, myAssertIs

    if len(sys.argv)>1:
        loglevel=logging.DEBUG
    else:
        loglevel=logging.WARN
    logging.basicConfig(stream=sys.stderr, level=loglevel)

    m8data=["001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3	gi|91763278|ref|ZP_01265242.1|	Peptidase family M48 [Candidatus Pelagibacter ubique HTCC1002]	61.7021276595745	94	282	1	57	150	134	4e-30	0.992957746478873\n",
        "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3	gi|71083682|ref|YP_266402.1|	M48 family peptidase [Candidatus Pelagibacter ubique HTCC1062]	61.7021276595745	94	282	1	40	133	134	4e-30	0.992957746478873\n",
        "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3	gi|262277211|ref|ZP_06055004.1|	peptidase family M48 family [alpha proteobacterium HIMB114]	65.9090909090909	88	264	1	63	150	132	9e-30	0.929577464788732\n",
        "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3	gi|254456035|ref|ZP_05069464.1|	peptidase family M48 [Candidatus Pelagibacter sp. HTCC7211]	66.2790697674419	86	258	1	65	150	132	2e-29	0.908450704225352\n",
        "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3	gi|118581678|ref|YP_902928.1|	peptidase M48, Ste24p [Pelobacter propionicus DSM 2379]	51.6129032258064	93	282	4	53	144	108	2e-22	0.982394366197183\n",
        "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3	gi|255534285|ref|YP_003094656.1|	zn-dependent protease with chaperone function [Flavobacteriaceae bacterium 3519-10]	47.3118279569892	93	282	4	55	146	102	1e-20	0.982394366197183\n",
        "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3	gi|317502588|ref|ZP_07960709.1|	M48B family peptidase [Prevotella salivae DSM 15606]	51.6129032258064	93	279	1	85	176	100	7e-20	0.982394366197183\n",
        "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3	gi|325104752|ref|YP_004274406.1|	peptidase M48 Ste24p [Pedobacter saltans DSM 12145]	48.3870967741936	93	279	1	59	150	100	7e-20	0.982394366197183\n",
        "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3	gi|256425464|ref|YP_003126117.1|	peptidase M48 Ste24p [Chitinophaga pinensis DSM 2588]	48.8888888888889	90	273	4	58	146	99.8	9e-20	0.950704225352113\n",
        "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3	gi|299142895|ref|ZP_07036022.1|	peptidase, M48 family [Prevotella oris C735]	50	94	282	1	58	150	99.4	1e-19	0.992957746478873\n"]

    #test1 passthrough
    logging.info("Starting passthrough test")
    m8stream=m8data.__iter__()
    params=FilterParams()
    outs = filterM8Stream(m8stream,params)
    myAssertEq(outs.next(),m8data[0])
    myAssertEq(outs.next(),m8data[1])
    myAssertEq(outs.next(),m8data[2])
    myAssertEq(outs.next(),m8data[3])
    myAssertEq(outs.next(),m8data[4])

    logging.info("Starting best test")
    m8stream=m8data.__iter__()
    params=FilterParams(topPct=0.)
    outs = filterM8Stream(m8stream,params)
    myAssertEq(outs.next(),m8data[0])
    myAssertEq(outs.next(),m8data[1])
    try:
        outs.next()
        sys.exit("There should only be 2 elements!")
    except StopIteration:
        pass

    logging.info("Starting n1 test")
    m8stream=m8data.__iter__()
    params = FilterParams(hitsPerRead=1)
    outs = filterM8Stream(m8stream,params)
    myAssertEq(outs.next(),m8data[0])
    try:
        outs.next()
        sys.exit("There should only be 1 elements!")
    except StopIteration:
        pass
예제 #6
0
def test_gff():
    setup_tests()

    line = 'KM282-20-02b-5_c283151\tcsearch\ttRNA\t303\t233\t' + \
           '40.6\t-\t.\tTarget=RF00005 2 70\n'
    hit = Hit(line, 'gff')
    myAssertEq(hit.read, 'KM282-20-02b-5_c283151')
    myAssertEq(hit.score, 40.6)
    myAssertEq(hit.hit, 'RF00005')

    line = 'KM282-20-02a-100_c12273\tbarrnap:0.7\trRNA\t9\t772\t' + \
           '6.8e-41\t+\t.\tName=12S_rRNA;product=12S ribosomal RNA\n'
    hit = Hit(line, 'gff')
    myAssertEq(hit.read, 'KM282-20-02a-100_c12273')
    myAssertEq(hit.evalue, 6.8e-41)
    myAssertEq(hit.hit, '12S_rRNA')

    line = 'KM282-20-02a-100_c1\tProdigal_v2.6.2\tCDS\t309\t686\t' + \
           '53.3\t-\t0\tID=1_2;partial=00;start_type=ATG;rbs_motif=' + \
           'AGGA;rbs_spacer=5-10bp;gc_cont=0.381;conf=100.00;score=' + \
           '54.54;cscore=45.68;sscore=8.86;rscore=5.50;uscore=-0.63;' + \
           'tscore=2.76;\n'
    hit = Hit(line, 'gff')
    myAssertEq(hit.read, 'KM282-20-02a-100_c1')
    myAssertEq(hit.score, 53.3)
    myAssertEq(hit.hit, '1_2')
예제 #7
0
def test():
    setup_tests()
    m8data = ["001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3"
              "	gi|91763278|ref|ZP_01265242.1|	Peptidase family"
              " M48 [Candidatus Pelagibacter ubique HTCC1002]"
              "	61.7021276595745	94	282	1	57	150	134	4e-30"
              "	0.992957746478873\n",
              "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3"
              "	gi|71083682|ref|YP_266402.1|"
              "	M48 family peptidase [Candidatus Pelagibacter ubique"
              " HTCC1062]	61.7021276595745	94	282	1	40	133	134"
              "	4e-30	0.992957746478873\n",
              "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3"
              "	gi|262277211|ref|ZP_06055004.1|	peptidase family M48 family"
              " [alpha proteobacterium HIMB114]	65.9090909090909	88	264"
              "	1	63	150	132	9e-30	0.929577464788732\n",
              "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3"
              "	gi|254456035|ref|ZP_05069464.1|	peptidase family M48"
              " [Candidatus Pelagibacter sp. HTCC7211]	66.2790697674419"
              "	86	258	1	65	150	132	2e-29	0.908450704225352\n",
              "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3"
              "	gi|118581678|ref|YP_902928.1|	peptidase M48, Ste24p"
              " [Pelobacter propionicus DSM 2379]	51.6129032258064	93"
              "	282	4	53	144	108	2e-22	0.982394366197183\n",
              "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3"
              "	gi|255534285|ref|YP_003094656.1|	zn-dependent protease"
              " with chaperone function [Flavobacteriaceae bacterium"
              " 3519-10]	47.3118279569892	93	282	4	55	146	102"
              "	1e-20	0.982394366197183\n",
              "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3"
              "	gi|317502588|ref|ZP_07960709.1|	M48B family peptidase"
              " [Prevotella salivae DSM 15606]	51.6129032258064	93"
              "	279	1	85	176	100	7e-20	0.982394366197183\n",
              "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3"
              "	gi|325104752|ref|YP_004274406.1|	peptidase M48 Ste24p"
              " [Pedobacter saltans DSM 12145]	48.3870967741936	93"
              "	279	1	59	150	100	7e-20	0.982394366197183\n",
              "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3"
              "	gi|256425464|ref|YP_003126117.1|	peptidase M48 Ste24p"
              " [Chitinophaga pinensis DSM 2588]	48.8888888888889"
              "	90	273	4	58	146	99.8	9e-20	0.950704225352113\n",
              "001598_1419_3101	H186x25M  length=284 uaccno=E3N7QM101DQXE3"
              "	gi|299142895|ref|ZP_07036022.1|	peptidase, M48 family"
              " [Prevotella oris C735]	50	94	282	1	58	150	99.4"
              "	1e-19	0.992957746478873\n"]

    # test1 passthrough
    logging.info("Starting passthrough test")
    m8stream = m8data.__iter__()
    params = FilterParams()
    outs = filterM8Stream(m8stream, params, returnLines=True)
    myAssertEq(next(outs), m8data[0])
    myAssertEq(next(outs), m8data[1])
    myAssertEq(next(outs), m8data[2])
    myAssertEq(next(outs), m8data[3])
    myAssertEq(next(outs), m8data[4])

    logging.info("Starting best test")
    m8stream = m8data.__iter__()
    params = FilterParams(top_pct=0.)
    outs = filterM8Stream(m8stream, params)
    myAssertEq(next(outs), m8data[0])
    myAssertEq(next(outs), m8data[1])
    try:
        next(outs)
        sys.exit("There should only be 2 elements!")
    except StopIteration:
        pass

    logging.info("Starting n1 test")
    m8stream = m8data.__iter__()
    params = FilterParams(hits_per_read=1)
    outs = filterM8Stream(m8stream, params)
    myAssertEq(next(outs), m8data[0])
    try:
        next(outs)
        sys.exit("There should only be 1 elements!")
    except StopIteration:
        pass
예제 #8
0
def test_gff():
    setup_tests()

    line = 'KM282-20-02b-5_c283151\tcsearch\ttRNA\t303\t233\t' + \
           '40.6\t-\t.\tTarget=RF00005 2 70\n'
    hit = Hit(line, 'gff')
    myAssertEq(hit.read, 'KM282-20-02b-5_c283151')
    myAssertEq(hit.score, 40.6)
    myAssertEq(hit.hit, 'RF00005')

    line = 'KM282-20-02a-100_c12273\tbarrnap:0.7\trRNA\t9\t772\t' + \
           '6.8e-41\t+\t.\tName=12S_rRNA;product=12S ribosomal RNA\n'
    hit = Hit(line, 'gff')
    myAssertEq(hit.read, 'KM282-20-02a-100_c12273')
    myAssertEq(hit.evalue, 6.8e-41)
    myAssertEq(hit.hit, '12S_rRNA')

    line = 'KM282-20-02a-100_c1\tProdigal_v2.6.2\tCDS\t309\t686\t' + \
           '53.3\t-\t0\tID=1_2;partial=00;start_type=ATG;rbs_motif=' + \
           'AGGA;rbs_spacer=5-10bp;gc_cont=0.381;conf=100.00;score=' + \
           '54.54;cscore=45.68;sscore=8.86;rscore=5.50;uscore=-0.63;' + \
           'tscore=2.76;\n'
    hit = Hit(line, 'gff')
    myAssertEq(hit.read, 'KM282-20-02a-100_c1')
    myAssertEq(hit.score, 53.3)
    myAssertEq(hit.hit, '1_2')
예제 #9
0
def testReadKeggFile(keggFile):
    kDmap = readKeggFile(keggFile, 'DESCRIPTION')
    myAssertEq(kDmap['K09630'], ['PRSS36; protease, serine, 36 [EC:3.4.21.-]'])
    kPmap = readKeggFile(keggFile, 'PATHWAY')
    assert('K00397' in kPmap)
    myAssertEq(kPmap['K00399'], ['01200 Carbon metabolism',
                                 '00680 Methane metabolism', '01000 Enzymes'])
    k2map = readKeggFile(keggFile, 2)
    myAssertEq(k2map['K13810'][1].lower(), 'Carbohydrate Metabolism'.lower())
    myAssertEq(k2map['K13810'][0].lower(), 'Overview'.lower())
    myAssertEq(k2map['K00399'][1].lower(), 'Energy Metabolism'.lower())
    myAssertEq(k2map['K00399'][0].lower(), 'Overview'.lower())
    k3map = readKeggFile(keggFile, 3)
    myAssertEq(k3map['K13810'],
               ['01230 Biosynthesis of amino acids',
                '00010 Glycolysis / Gluconeogenesis',
                '00030 Pentose phosphate pathway',
                '00500 Starch and sucrose metabolism',
                '00520 Amino sugar and nucleotide sugar metabolism',
                '01000 Enzymes',
                '01000 Enzymes'])
    myAssertEq(k3map['K00399'], ['01200 Carbon metabolism',
                                 '00680 Methane metabolism', '01000 Enzymes'])
    myAssertEq(
        k3map['K03404'], [
            '00860 Porphyrin and chlorophyll metabolism', '01000 Enzymes'])
    myAssertEq(k3map['K01976'], ['01000 Enzymes'])
    myAssertEq(k3map['K07347'],
               ['02000 Transporters',
                '02044 Secretion system',
                '02035 Bacterial motility proteins',
                '05133 Pertussis'])
    myAssertEq(k3map['K09630'], ['01000 Enzymes', '01002 Peptidases'])
    k3mapQ = readKeggFile(keggFile, '3')
    for k in k3map.keys():
        try:
            myAssertEq(k3map[k], k3mapQ[k])
        except AssertionError:
            raise AssertionError(
                "level 3 classes for %s do not match:\n%s\n%s" %
                (k, k3map[k], k3mapQ[k]))
예제 #10
0
def testParseGeneLink(koFile):
    gkmap = parseLinkFile(koFile)
    myAssertEq(gkmap['ggo:101148121'], ['K16534'])
    myAssertEq(gkmap['olu:OSTLU_15108'], ['K11126'])
    myAssertEq(gkmap['ebt:EBL_c03070'], ['K02879'])
    myAssertEq(gkmap['pec:W5S_4205'], ['K00363'])
    myAssertEq(gkmap['buc:BU148'], ['K03101'])
    myAssertEq(gkmap['smaf:D781_0330'], ['K06925'])
    myAssertEq(gkmap['nkr:NKOR_05565'], ['K03524'])
예제 #11
0
def testReadKeggFile(keggFile):
    kDmap = readKeggFile(keggFile, 'DESCRIPTION')
    myAssertEq(kDmap['K09630'], ['PRSS36; protease, serine, 36 [EC:3.4.21.-]'])
    kPmap = readKeggFile(keggFile, 'PATHWAY')
    assert ('K00397' in kPmap)
    myAssertEq(kPmap['K00399'], [
        '01200 Carbon metabolism', '00680 Methane metabolism', '01000 Enzymes'
    ])
    k2map = readKeggFile(keggFile, 2)
    myAssertEq(k2map['K13810'][1].lower(), 'Carbohydrate Metabolism'.lower())
    myAssertEq(k2map['K13810'][0].lower(), 'Overview'.lower())
    myAssertEq(k2map['K00399'][1].lower(), 'Energy Metabolism'.lower())
    myAssertEq(k2map['K00399'][0].lower(), 'Overview'.lower())
    k3map = readKeggFile(keggFile, 3)
    myAssertEq(k3map['K13810'], [
        '01230 Biosynthesis of amino acids',
        '00010 Glycolysis / Gluconeogenesis',
        '00030 Pentose phosphate pathway',
        '00500 Starch and sucrose metabolism',
        '00520 Amino sugar and nucleotide sugar metabolism', '01000 Enzymes',
        '01000 Enzymes'
    ])
    myAssertEq(k3map['K00399'], [
        '01200 Carbon metabolism', '00680 Methane metabolism', '01000 Enzymes'
    ])
    myAssertEq(k3map['K03404'],
               ['00860 Porphyrin and chlorophyll metabolism', '01000 Enzymes'])
    myAssertEq(k3map['K01976'], ['01000 Enzymes'])
    myAssertEq(k3map['K07347'], [
        '02000 Transporters', '02044 Secretion system',
        '02035 Bacterial motility proteins', '05133 Pertussis'
    ])
    myAssertEq(k3map['K09630'], ['01000 Enzymes', '01002 Peptidases'])
    k3mapQ = readKeggFile(keggFile, '3')
    for k in k3map.keys():
        try:
            myAssertEq(k3map[k], k3mapQ[k])
        except AssertionError:
            raise AssertionError(
                "level 3 classes for %s do not match:\n%s\n%s" %
                (k, k3map[k], k3mapQ[k]))