def test_classification_with_colon(self): # since SMCOG id and description are stored in a string separated by :, # ensure that descriptions containing : are properly handled # test gene is AQF52_5530 from CP013129.1 translation = ( "MDTHQREEDPVAARRDRTHYLYLAVIGAVLLGIAVGFLAPGVAVELKPLGTGFVN" "LIKMMISPIIFCTIVLGVGSVRKAAKVGAVGGLALGYFLVMSTVALAIGLLVGNL" "LEPGSGLHLTKEIAEAGAKQAEGGGESTPDFLLGIIPTTFVSAFTEGEVLQTLLV" "ALLAGFALQAMGAAGEPVLRGIGHIQRLVFRILGMIMWVAPVGAFGAIAAVVGAT" "GAAALKSLAVIMIGFYLTCGLFVFVVLGAVLRLVAGINIWTLLRYLGREFLLILS" "TSSSESALPRLIAKMEHLGVSKPVVGITVPTGYSFNLDGTAIYLTMASLFVAEAM" "GDPLSIGEQISLLVFMIIASKGAAGVTGAGLATLAGGLQSHRPELVDGVGLIVGI" "DRFMSEARALTNFAGNAVATVLVGTWTKEIDKARVTEVLAGNIPFDEKTLVDDHA" "PVPVPDQRAEGGEEKARAGV") cds = helpers.DummyCDS(0, len(translation)) cds.translation = translation results = smcogs.classify("test", [cds], get_config()) assert results.best_hits[cds.get_name( )].hit_id == "SMCOG1212:sodium:dicarboxylate symporter" record = helpers.DummyRecord(seq=translation) record.add_cds_feature(cds) record.add_protocluster(helpers.DummyProtocluster(0, len(translation))) # if we don't handle multiple semicolons right, this line will crash results.add_to_record(record) gene_functions = cds.gene_functions.get_by_tool("smcogs") assert len(gene_functions) == 1 assert str(gene_functions[0]).startswith( "transport (smcogs) SMCOG1212:sodium:dicarboxylate symporter" " (Score: 416; E-value: 2.3e-126)")
def test_annotations(self): results = smcogs.classify(self.record.id, self.record.get_cds_features(), self.options) results.add_to_record(self.record) for cds in self.record.get_cds_features(): if cds.gene_functions.get_by_tool("rule-based-clusters"): continue assert cds.gene_function == results.function_mapping.get( cds.get_name(), GeneFunction.OTHER)
def test_results_reconstruction(self): results = smcogs.classify(self.record.id, self.record.get_cds_features(), self.options) assert results.tool == "smcogs" assert results.best_hits[ "nisB"].hit_id == 'SMCOG1155:Lantibiotic dehydratase domain protein' json = results.to_json() assert json["best_hits"]["nisB"][ 0] == 'SMCOG1155:Lantibiotic dehydratase domain protein' reconstructed = core.FunctionResults.from_json(json, self.record) assert reconstructed.tool == "smcogs" assert reconstructed.best_hits[ "nisB"].hit_id == 'SMCOG1155:Lantibiotic dehydratase domain protein' assert reconstructed.to_json() == json