def get_term2id( ontology: Ontology ) -> typing.Tuple[typing.Dict[str, str], typing.List[str]]: terms = [] term2id = {} for term in ontology.terms(): if term.name: terms.append(term.name) term2id[term.name] = term.id return term2id, terms
def convert_to_json(input="GFOP.owl", output="GFOP.json"): # create a root to bundle everything root: Node = Node("GFOP") nodes = {} # read owl file and cache all nodes in a dict{name, node} obo = Ontology(input) for term in obo.terms(): id = term.id name = term.name # find parents in distance 1 (exclude self) parent_terms = term.superclasses(with_self=False, distance=1).to_set() if parent_terms is None or len(parent_terms) == 0: # create root node nodes[name] = Node(name, id=id) else: # currently only uses one parent parent = parent_terms.pop() nodes[name] = Node(name, id=id, parent_id=parent.id, parent_name=parent.name) # link all nodes to their parents for key, node in nodes.items(): if key is not root.name: try: # find parent in cached nodes and set to node node.parent = nodes[node.parent_name] except AttributeError: # no parent - add to root node.parent = root # generate json string exporter = JsonExporter(indent=2, sort_keys=True) json = exporter.export(root) # print json and tree for debugging print(json) for pre, _, node in RenderTree(root): print("%s%s" % (pre, node.name)) # export to json file print("Writing to {}".format(output)) with open(output, "w") as file: print(json, file=file)
def ontologyExtractor(ontName): ''' Extracts raw (property, entity) RDF triples from ontology given entity name ''' ont = Ontology(ontName) ontology = [] for term in list(ont.terms()): subclasses = list(term.subclasses()) ontology.extend([(subclass.name, "subclass_of", term.name) for subclass in subclasses]) ontology = [ tuple(el.split("\t")) for el in list( set([ "\t".join(relation) for relation in ontology if relation[0] != relation[-1] ])) ] ontology = [ ont for ont in ontology if ont[0] == "Pizza" or ont[-1] == "Pizza" ] ontology = [("hypernym", el[-1]) if el[0] == "Pizza" else ("hyponym", el[0] + " Pizza") for el in ontology] return ontology
aro = Ontology.from_obo_library("aro.owl") #exploring ontology cf = aro['confers_resistance_to_antibiotic'] t = aro['ARO:1000001'] list(t.objects(cf)) list(t.superclasses()) list(t.subclasses()) #change format to obo with open('aro.obo', 'wb') as f: aro.dump(f, format='obo') #find terms aro = Ontology("aro.obo") for term in aro.terms(): if term.is_leaf(): print(term.id) #load resfinder sequences for matching import pandas as pd resfinder_seq = pd.read_csv("resfinder.csv", sep=" ", header=None) aro2seq = {} for a in set(resfinder_seq['#Aminoglycoside']): if a not in aro: continue t = aro[a] cf = aro['name.lower()'] cur = list(t.subclasses(cf)) if cur: aro2seq[a] = ';'.join([c.name for c in cur])
class ChebiOntologySubsetter: # Extract a subset of ChEBI that is for just the # classes that are included in the experiment, for visualising # the F1 score as a colour on a network plot. def __init__(self): self.chebislim = None self.classes_in = None def createSubsetFor(self, classes_in): self.classes_in = set(classes_in) print(len(classes_in)) with open("classes_in.txt", 'w') as outfile: for c in self.classes_in: outfile.writelines(c + "\n") rw = robot_wrapper.RobotWrapper( robotcmd='/Users/hastingj/Work/Onto/robot/robot') #get_ontology_cmd = 'curl -L http://purl.obolibrary.org/obo/chebi.obo > chebi.obo' #rw.__executeCommand__(get_ontology_cmd) extract_cmd = [ rw.robotcmd, "extract --method MIREOT ", "--input chebi.obo", "--lower-terms classes_in.txt", "--intermediates minimal", "--output chebi-slim.obo" ] rw.__executeCommand__(" ".join(extract_cmd)) self.chebislim = Ontology("chebi-slim.obo") # For the ontology subset, print a network image for those classes # including colours for some numeric value assigned to classes # colour_nums = dictionary of chebi IDs to some number between 0 and 1 # num_cols = how many colours to generate in the range from colour start to end def printImageOfSubset(self, image_name, colour_nums, num_cols=100, colour_start="red", colour_end="green"): red = Color(colour_start) colors = list(red.range_to(Color(colour_end), num_cols)) G = nx.DiGraph() for term in self.chebislim.terms(): chebi_id = term.id chebi_name = term.name parents = set([ t.id for rel in term.relationships for t in term.relationships[rel] if rel.name == 'is a' ]) definition = term.definition color = 'grey' if chebi_id in self.classes_in: cnum = colour_nums[ chebi_id] #f1val = classifres[chebi_id]['f1-score'] color = colors[int(num_cols * cnum) - 1] G.add_node(chebi_name, color=color) for p in parents: G.add_edge(chebi_name, self.chebislim[p].name) pdot = nx.drawing.nx_pydot.to_pydot(G) for i, node in enumerate(pdot.get_nodes()): node.set_shape('box') node.set_fontcolor('black') #node.set_fillcolor(node.color) node.set_style('rounded, filled') #node.set_color(node.color) png_path = image_name #"chebi-slim-vis.png" pdot.write_png(png_path, prog='/usr/local/bin/dot')