def load_whog(self, whog_path): with open(whog_path, "r") as handle: groups = handle.read().split("_______") for group in groups: glines = [g.strip() for g in group.split("\n") if g.strip()] if glines: line = glines[0].lower() parent = line.lower().strip().split(" ")[0] term = line.lower().strip().split(" ")[1] name = " ".join(line.lower().strip().split(" ")[2:]) ont_doc = Ontology(term=term, name=name, parent=parent, ontology="cog") keywords = self.ki.extract_keywords(line) if len(parent) > 3: for x in parent[1:-1]: parent_ont_doc = Ontology.objects(term='[' + x + ']').get() keywords = list(set(parent_ont_doc.keywords + keywords)) parent_ont_doc.children.append(term) parent_ont_doc.save() else: parent_ont_doc = Ontology.objects(term=parent).get() parent_ont_doc.children.append(term) parent_ont_doc.save() keywords = list(set(parent_ont_doc.keywords + keywords)) ont_doc.keywords = keywords ont_doc.save()
def load_dat(self, reactions_file, database, postfix): with open(reactions_file) as reactions_handle: lines = [ x for x in reactions_handle.readlines() if not x.startswith("#") ] records = re.split("//\n", "\n".join(lines)) for record in records: if not record.strip(): continue ont_doc = Ontology(ontology=self.ontology_name + postfix) ont_doc.databases.append(database) reaction_types = [] ec = None for str_record in [y for y in record.split("\n") if y]: if str_record.strip() and len(str_record.strip()) > 3: if len(str_record.split(" - ")) > 1: field = str_record.split(" - ")[0].strip() try: value = str_record.split( " - ")[1].strip().decode("utf-8") except UnicodeDecodeError: continue if field == "UNIQUE-ID": ont_doc.term = value.lower() elif field == "TYPES": reaction_types.append(value) elif field == "IN-PATHWAY": ont_doc.parents.append(value) elif field == "COMMON-NAME": ont_doc.name = value elif (field == "COMMENT") and (not ont_doc.name): ont_doc.description = value elif (field == "EC-NUMBER") and (not ont_doc.name): ec = value if not ont_doc.description: ont_doc.description = "|".join(reaction_types) if not ont_doc.name: if ec: ont_doc.name = ec else: ont_doc.name = ont_doc.term ont_doc.keywords = self.ki.extract_keywords( ont_doc.name) + [ont_doc.term] ont_doc.types = reaction_types if ec: ont_doc.keywords.append(ec) if not ont_doc.term: print(record) else: ont_doc.save()
def load_enzclass(self, enzclass_file_path): root = Ontology(ontology=self.ontology_name, term="root", name="ec", children=["ec:1.-.-.-", "ec:2.-.-.-", "ec:3.-.-.-", "ec:4.-.-.-", "ec:5.-.-.-", "ec:6.-.-.-"]) root.save() with open(enzclass_file_path) as enzclass_handle: for line in enzclass_handle: if re.match(r'^[1-6][.]', line): name = line.split(".-")[-1].strip() term = "ec:" + line.replace(name, "").replace(" ", "").strip() ont_doc = Ontology(ontology=self.ontology_name, term=term, name=name) ont_doc.keywords = self.ki.extract_keywords(ont_doc.name) + [ont_doc.term] ont_doc.save()
def _load_mongo(self): root = Ontology(ontology=self.ontology_name, term="root", successors=self.root_terms, children=self.root_terms) root.save() for (node, data) in self.graph.nodes_iter( data=True): # self.graph.add_node(node, **data) if node == "root": raise Exception("...") else: successors = self.graph.successors(node) _ancestors = self.complete_subgraph([node]) database = "biological_process" if "go:0005575" in _ancestors: database = "cellular_component" if "go:0003674" in _ancestors: database = "molecular_function" ont_doc = Ontology( ontology=self.ontology_name, term=node, name=data["name"], database=database, successors=self.all_successors(node, []), children=successors, description=self.go_dag.query_term(node.upper()).desc, # successors_relationships=self.successors_relationships(node), subclases=list( set([ x.lower() for x in self.go_dag.query_term( node.upper()).get_all_children() ]))) ont_doc.keywords = self.ki.extract_keywords( [ont_doc.description, ont_doc.name, ont_doc.term]) ont_doc.save()