def ncbi(args: argparse.Namespace) -> None: gff = GFF.parse(args.infile).break_bubbles() so = Ontology.from_obo_library(args.so) name_to_so = {term.name: term for term in so.values()} add_so_as_ontologies(gff, name_to_so) add_ncrna_types(gff, name_to_so, so, NCRNA_TYPES) add_pseudogene_types(gff, name_to_so, so, PSEUDOGENE_TYPES) return
def ontology_from_obo_library(ontology_short_name: str) -> Ontology: """parse an ontology This is taken directly from pronto Ontology right now. Parse an OBO, JSON-graph, or OWL format ontology. Parameters ========== ontology_short_name: str The short name from (cl.obo for cell line, ncit.obo for NCIT, etc.) Return ====== An pronto Ontology object """ ont = Ontology.from_obo_library(ontology_short_name) return ont
from pronto import Ontology cl = Ontology("http://purl.obolibrary.org/obo/cl.obo") #for ARGs (ARO for antibiotic resistance ontology--aro.owl from https://card.mcmaster.ca/download) aro = Ontology.from_obo_library("aro.owl") #exploring ontology cf = aro['confers_resistance_to_antibiotic'] t = aro['ARO:1000001'] list(t.objects(cf)) list(t.superclasses()) list(t.subclasses()) #change format to obo with open('aro.obo', 'wb') as f: aro.dump(f, format='obo') #find terms aro = Ontology("aro.obo") for term in aro.terms(): if term.is_leaf(): print(term.id) #load resfinder sequences for matching import pandas as pd resfinder_seq = pd.read_csv("resfinder.csv", sep=" ", header=None) aro2seq = {} for a in set(resfinder_seq['#Aminoglycoside']): if a not in aro: continue t = aro[a]