def testTranslateAndCountHits(hits): (total, counts) = countHits(hits) myAssertEq(total,29) myAssertEq(counts["Prochlorococcus"],10) myAssertEq(counts['root'],7) translateHits(hits,{'Bacteria <prokaryote>':'other','root':'other','Candidatus Pelagibacter':'Pelagibacter'}) myAssertEq(hits['000178_2410_1152'],['other']) myAssertEq(hits['000093_2435_2228'],['Pelagibacter'])
def testTranslateAndCountHits(hits): (total, counts) = countHits(hits) myAssertEq(total, 29) myAssertEq(counts["Prochlorococcus"], 10) myAssertEq(counts['root'], 7) translateHits(hits, {'Bacteria <prokaryote>': 'other', 'root': 'other', 'Candidatus Pelagibacter': 'Pelagibacter'}) myAssertEq(hits['000178_2410_1152'], ['other']) myAssertEq(hits['000093_2435_2228'], ['Pelagibacter'])
def testReadKoFile(koFile): kPmap = readKOFile(koFile, "PATHWAY") assert "K00397" not in kPmap myAssertEq(kPmap["K00399"], ["ko00680 Methane metabolism", "ko01200 Carbon metabolism"]) kEmap = readKOFile(koFile, "EC") myAssertEq(kEmap["K00397"], ["EC:1.8.99.-"]) myAssertEq(kEmap["K00399"], ["EC:2.8.4.1"])
def test_read_ncbi(ndir): taxNames = True taxonomy = readTaxonomy(ndir, taxNames) taxIds = taxonomy.idMap taxNames = taxonomy.nameMap myAssertEq(len(taxIds), 783145) myAssertEq(len(taxNames), 1101991) # pick some random things to check myAssertEq(taxIds[123456].name, 'Psammomoya choretroides') myAssertIs(taxNames[simplifyString('Psammomoya choretroides')], taxIds[123456]) myAssertIs( taxNames[simplifyString('Psammomoya choretroides ' '(F.Muell.) Diels & Loes.')], taxIds[123456]) myAssertEq(taxIds[123499].parent.id, 50537) return taxIds
def test_read_ncbi(ndir): taxNames = True taxonomy = readTaxonomy(ndir, taxNames) taxIds = taxonomy.idMap taxNames = taxonomy.nameMap myAssertEq(len(taxIds), 783145) myAssertEq(len(taxNames), 1101991) # pick some random things to check myAssertEq(taxIds[123456].name, 'Psammomoya choretroides') myAssertIs( taxNames[ simplifyString('Psammomoya choretroides')], taxIds[123456]) myAssertIs(taxNames[simplifyString('Psammomoya choretroides ' '(F.Muell.) Diels & Loes.')], taxIds[123456]) myAssertEq(taxIds[123499].parent.id, 50537) return taxIds
def testParseHits(testFile): # test line parsing methods cells = [1, 2, 3, 4, "(4,5)", "6,7"] (read, hitIter) = _getReadHitsSimple(cells, 0, 2, None) hits = [] for h in hitIter: hits.append(h) myAssertEq(read, 1) myAssertEq(len(hits), 1) myAssertEq(hits[0], 3) (read, hitIter) = _getReadHitsSep(cells, 1, 5, ',') hits = [] for h in hitIter: hits.append(h) myAssertEq(read, 2) myAssertEq(hits, ['6', '7']) (read, hitIter) = _getReadHitsAll(list(cells), 3, -1, None) hits = [] for h in hitIter: hits.append(h) myAssertEq(read, 4) myAssertEq(len(hits), 5) myAssertEq(hits, [1, 2, 3, "(4,5)", "6,7"]) # give it a test file hitIter = parseHits(open(testFile), 0, -1, True, None) hits = {} for r, h in hitIter: hits[r] = h logging.debug(repr(hits)) myAssertEq(len(hits), 29) myAssertEq(hits['000023_2435_2174'], ['Prochlorococcus']) myAssertEq(hits['000178_2410_1152'], ['Bacteria <prokaryote>']) myAssertEq(hits['000093_2435_2228'], ['Candidatus Pelagibacter']) return hits
def testReadKeggFile(keggFile): kDmap = readKeggFile(keggFile, "DESCRIPTION") myAssertEq(kDmap["K09630"], ["PRSS36; protease, serine, 36 [EC:3.4.21.-]"]) kPmap = readKeggFile(keggFile, "PATHWAY") assert "K00397" in kPmap myAssertEq(kPmap["K00399"], ["01200 Carbon metabolism", "00680 Methane metabolism", "01000 Enzymes"]) k2map = readKeggFile(keggFile, 2) myAssertEq(k2map["K13810"][1].lower(), "Carbohydrate Metabolism".lower()) myAssertEq(k2map["K13810"][0].lower(), "Overview".lower()) myAssertEq(k2map["K00399"][1].lower(), "Energy Metabolism".lower()) myAssertEq(k2map["K00399"][0].lower(), "Overview".lower()) k3map = readKeggFile(keggFile, 3) myAssertEq( k3map["K13810"], [ "01230 Biosynthesis of amino acids", "00010 Glycolysis / Gluconeogenesis", "00030 Pentose phosphate pathway", "00500 Starch and sucrose metabolism", "00520 Amino sugar and nucleotide sugar metabolism", "01000 Enzymes", "01000 Enzymes", ], ) myAssertEq(k3map["K00399"], ["01200 Carbon metabolism", "00680 Methane metabolism", "01000 Enzymes"]) myAssertEq(k3map["K03404"], ["00860 Porphyrin and chlorophyll metabolism", "01000 Enzymes"]) myAssertEq(k3map["K01976"], ["01000 Enzymes"]) myAssertEq( k3map["K07347"], ["02000 Transporters", "02044 Secretion system", "02035 Bacterial motility proteins", "05133 Pertussis"], ) myAssertEq(k3map["K09630"], ["01000 Enzymes", "01002 Peptidases"]) k3mapQ = readKeggFile(keggFile, "3") for k in k3map.iterkeys(): try: myAssertEq(k3map[k], k3mapQ[k]) except AsserionError: raise AssertionError("level 3 classes for %s do not match:\n%s\n%s" % (k, k3map[k], k3mapQ[k]))
def testParseGeneKOMap(koFile): gkmap = parseGeneKOMap(koFile) myAssertEq(gkmap["dpe:Dper_GL25993"], ["K00001"]) myAssertEq(gkmap["rpc:RPC_2974"], ["K00001"]) myAssertEq(gkmap["pic:PICST_59568"], ["K00100"]) myAssertEq(gkmap["bbp:BBPR_1508"], ["K02755", "K02756", "K02757"]) myAssertEq(gkmap["sfv:SFV_2242"], ["K02769", "K02770"]) myAssertEq(gkmap["fma:FMG_0161"], ["K02982"])