def obo(path: str, check: bool): """Export CONSO as OBO.""" get_obo().write_obo(path) if check: import obonet obonet.read_obo(path)
def get_ontology(obo_link = '../ontologies/go-basic.obo', reverse_graph = "false"): try: graph = obonet.read_obo(obo_link) except Exception as es: logging.info(es) graph = obonet.read_obo(obo_link) #obo_link = 'http://purl.obolibrary.org/obo/go/go-basic.obo' logging.info(obo_link) numberOfNodes = graph.number_of_nodes() logging.info("Number of nodes: {}".format(numberOfNodes)) reverseGraph = nx.DiGraph() ## generate whole graph first, we'll specialize later. wholeset = set() for edge in list(graph.edges()): edge_info = set(graph.get_edge_data(edge[0], edge[1]).keys()) wholeset = wholeset.union(edge_info) for itype in edge_info: if itype == "is_a" or itype == "part_of": if reverse_graph == "true": reverseGraph.add_edge(edge[1], edge[0], type=itype) else: reverseGraph.add_edge(edge[0], edge[1], type=itype) logging.info(nx.info(reverseGraph)) tnum = len(wholeset) logging.info("Found {} unique edge types, {}".format(tnum," | ".join(wholeset))) return reverseGraph
def textualize_top_k_terms(json_data, mapping, obo_link, class_names, k_number = 5): """ This method prints the names of the *k_number* most important terms for each class (according to genQ) """ try: graph = obonet.read_obo(obo_link) except Exception as es: logging.info(es) graph = obonet.read_obo(obo_link) id_to_name = {id_: data.get('name') for id_, data in graph.nodes(data=True)} ## go through mapping mc = {} all_terms = set() mappings = read_generic_gaf(mapping) for k, v in mappings.items(): for el in v: all_terms.add(el) if el in mc: mc[el] += 1 else: mc[el] = 1 normalization = len(all_terms) counter = 0 for keyClass in json_data["resulting_generalization"].keys(): first = True print() if keyClass != "average_depth" and keyClass != "average_association": genQ_dict = {} for term in json_data["resulting_generalization"][keyClass]["terms"]: IC = IC_of_a_term(term, mappings, mc, normalization) genQ = 1 - IC / 9.82 genQ_dict[term] = genQ for n in range(k_number): max = 0 term = "" for k,v in genQ_dict.items(): if v >= max: max = v term = k if first: print("Class " + str(keyClass) + " :− " + str(id_to_name[term])) first = False else: print("^" + str(id_to_name[term])) genQ_dict[term] = -1 counter += 1
def load_disease_file(disease_download_file, disease_output_file, disease_xref_output_file): # parse ontology ont = obonet.read_obo(disease_download_file) # build child node lookup dictionary child_dict = build_child_dict(ont) # build filter #do_filter = get_infectious_diseases(ont, child_dict) do_filter = None xref_list = [] with open(disease_output_file, "w", newline='') as outfile: writer = csv.writer(outfile, delimiter="\t") writer.writerow(["doid", "name", "definition", "parents", "link", "source", "license"]) for id_, data in ont.nodes(data=True): if do_filter == None or id_ in do_filter: row = parse_ontology_entry(id_, data) writer.writerow(row) parse_ontology_xref(id_, data, xref_list) xref_df = pd.DataFrame(xref_list) xref_df.to_csv(disease_xref_output_file, sep="\t", index=False)
def return_archived_ontology(version): ''' This function returns an archived ontology based on the version number. ''' url = 'ftp://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel' + version + '/ontology/chebi.obo' graph = obonet.read_obo(url) return graph
def update_resource(directory, url, prefix, *args, remove_prefix=False, allowed_synonyms=None): """Write the OBO information to files in the given directory.""" resource_path = _make_resource_path(directory, prefix) obo_path = os.path.join(directory, '%s.obo.pkl' % prefix) if os.path.exists(obo_path): with open(obo_path, 'rb') as file: g = pickle.load(file) else: g = obonet.read_obo(url) with open(obo_path, 'wb') as file: pickle.dump(g, file) entries = \ OboClient.entries_from_graph(g, prefix=prefix, remove_prefix=remove_prefix, allowed_synonyms=allowed_synonyms) entries = prune_empty_entries( entries, {'synonyms', 'xrefs', 'alt_ids', 'relations'}) with open(resource_path, 'w') as file: json.dump(entries, file, indent=1, sort_keys=True)
def _propagate_GO(gene_mapper_file, tree, out_file): """ Script for Propagation of GO terms. Input files required: 1) go graph 2) EggNOG GO predictions 3) Name of output file Output: 1) propagated Gene ontologies """ with open(tree, 'r') as f: go_graph = obonet.read_obo(f) # load gene mapper table mapped_genes = pd.read_csv(gene_mapper_file, sep="\t") # drop NAN in the Gene ID column mapped_genes = mapped_genes[mapped_genes['Gene ID'].notna()] # subset data and drop NANs in the GOs column genes_GO_df = mapped_genes[["Gene ID", "GOs"]].dropna() # propagate GO terms genes_GO_df['GOs_propagated'] = genes_GO_df['GOs'].str.split(',').\ apply(propagate_go, go_graph=go_graph) # save the file genes_GO_df.to_csv(out_file, sep='\t', index=False)
def __init__(self, path): """ Initialize an ontology class by providing the path. :param path: """ self.graph = read_obo(path) self.root_id = self._find_root_id()
def load_chebi(path="ftp://ftp.ebi.ac.uk/pub/databases/chebi/ontology/chebi.obo"): print("loading chebi from {}...".format(path)) #graph = obonet.read_obo("data/chebi.obo") graph = obonet.read_obo(path) graph.add_node(root_concept, name="ROOT") graph.add_edge(chemical_entity, root_concept, edgetype='is_a') graph.add_edge(role, root_concept, edgetype='is_a') graph.add_edge(subatomic_particle, root_concept, edgetype='is_a') graph.add_edge(application, root_concept, edgetype='is_a') #print([dir(d) for u,v,d in graph.edges(data=True)]) #sys.exit() graph = graph.to_directed() is_a_graph=networkx.MultiDiGraph([(u,v,d) for u,v,d in graph.edges(data=True) if d['edgetype'] == "is_a"] ) #print(networkx.is_directed_acyclic_graph(is_a_graph)) id_to_name = {id_: data['name'] for id_, data in graph.nodes(data=True)} name_to_id = {data['name']: id_ for id_, data in graph.nodes(data=True)} id_to_index = {e: i+1 for i, e in enumerate(graph.nodes())} # ids should start on 1 and not 0 id_to_index[""] = 0 synonym_to_id = {} print("synonyms to ids...") for n in graph.nodes(data=True): # print(n[1].get("synonym")) for syn in n[1].get("synonym", []): syn_name = syn.split('"') if len(syn_name) > 2: syn_name = syn.split('"')[1] synonym_to_id.setdefault(syn_name, []).append(n[0]) #else: #print("not a synonym:", syn.split('"')) #print(synonym_to_id) print("done.", len(name_to_id), "ids", len(synonym_to_id), "synonyms") return is_a_graph, name_to_id, synonym_to_id, id_to_name, id_to_index
def parse_hpo_obo(path): """Parse HPO obo file. return a network MultiDiGraph object """ import obonet graph = obonet.read_obo(path) return graph
def scHCL_adata(adata, verbose=False, n_cores=1, n_min=10): """ previous main function """ ref_df = reference_hcl.load_HCL_reference() transformed_adata = process_adata(adata, ref_df) scHCL_df, scHCL_df_extended_Celltypes = call_celltypes( transformed_adata, ref_df, n_cores) scHCL_df = annotate_refined(scHCL_df, n_min) # cell ontology graph = obonet.read_obo('http://purl.obolibrary.org/obo/cl/cl-basic.obo') id_to_name = { id_: data.get('name') for id_, data in graph.nodes(data=True) } scHCL_df['CLid'] = scHCL_df['hcl_refined'].apply( lambda x: reference_hcl.refined_celltypes_to_cell_ontology[x] if x in reference_hcl.refined_celltypes_to_cell_ontology else 'unknown') scHCL_df['CL_name'] = scHCL_df['CLid'].apply( lambda x: id_to_name[x] if x in id_to_name else 'unknown') return scHCL_df, scHCL_df_extended_Celltypes
def test_read_obo(ontology): """ Test that reading ontology does not error. """ url = 'http://purl.obolibrary.org/obo/{}.obo'.format(ontology) graph = obonet.read_obo(url) assert graph
def __init__(self, obo_path=OBOURL, ): """ Loads and interprets a PSI-MS obo file into a python-interpretable format. :param obo_path: file path or url to an obo file """ CVParameterSet.__init__(self) try: self.obo_file = obonet.read_obo(obo_path) # read the obo file except (FileNotFoundError, urllib.error.HTTPError): raise FileNotFoundError(f'An obo file could not be found at the provided path or URL: {obo_path}') if obo_path == OBOURL: # remind the user to cite the pulication print('Data was read from PSI-MS, please cite DOI: 10.1093/database/bat009') self.format_version = self.obo_file.graph['format-version'] self.data_version = self.obo_file.graph['data-version'] for acc in self.obo_file: dct = self.obo_file.node[acc] if 'def' in dct: # if the invalid key def is in the dictionary, convert and remove dct['definition'] = dct['def'] del dct['def'] self.cv_values[acc] = CVParam( id=acc, **dct, )
def load(obo_file): """ Load OBO file into a networkx graph. :param obo_file: OBO definition file. :param logger: Python `logging` logger instance. :return: `networkx.MultiDiGraph` """ try: hpo_network = obonet.read_obo(obo_file) #return nx.MultiDiGraph(hpo_network.subgraph(['HP:0000118'] + list(nx.ancestors(hpo_network, 'HP:0000118')))) except (FileNotFoundError, PermissionError) as e: if logger is not None: logger.critical(e) else: sys.stderr.write(str(e)) exit(1) # roots for non-phenotype nodes non_phenotypes = { 'mortality_aging': 'HP:0040006', 'mode_of_inheritance': 'HP:0000005', 'clinical_modifier': 'HP:0012823', 'frequency': 'HP:0040279', 'clinical_course': 'HP:0031797', } # remove non-phenotype branches for _, hpo_id in non_phenotypes.items(): if hpo_id in hpo_network.nodes: children = nx.ancestors(hpo_network, hpo_id) hpo_network.remove_nodes_from([hpo_id] + list(children)) return hpo_network
def handle(self, file: str, verbosity: int = 1, **options): """Execute the main function.""" try: FileValidator().validate(file) except ImportingError as e: raise CommandError(e) # Load the ontology file with open(file) as obo_file: G = obonet.read_obo(obo_file) if verbosity > 0: self.stdout.write("Preprocessing") cv_name = "relationship" # Initializing ontology ontology = OntologyLoader(cv_name) # Load typedefs as Dbxrefs and Cvterm if verbosity > 0: self.stdout.write("Loading typedefs") for data in tqdm(G.graph["typedefs"], disable=False if verbosity > 0 else True): ontology.store_type_def(data) if verbosity > 0: self.stdout.write(self.style.SUCCESS("Done"))
def __init__( self, obo_path=OBOURL, ): """ Loads and interprets a PSI-MS obo file into a python-interpretable format. :param obo_path: file path or url to an obo file """ CVParameterSet.__init__(self) try: self.obo_file = obonet.read_obo(obo_path) # read the obo file except (FileNotFoundError, urllib.error.HTTPError): raise FileNotFoundError( f'An obo file could not be found at the provided path or URL: {obo_path}' ) if obo_path == OBOURL: # remind the user to cite the pulication print( 'Data was read from PSI-MS, please cite DOI: 10.1093/database/bat009' ) self.format_version = self.obo_file.graph['format-version'] self.data_version = self.obo_file.graph['data-version'] for acc in self.obo_file: dct = self.obo_file.nodes[acc] if 'def' in dct: # if the invalid key def is in the dictionary, convert and remove dct['definition'] = dct['def'] del dct['def'] self.cv_values[acc] = CVParam( id=acc, **dct, )
def test_store_type_def(self): """Tests - store type_def.""" directory = os.path.dirname(os.path.abspath(__file__)) file = os.path.join(directory, "data", "so_fake.obo") with open(file) as obo_file: G = obonet.read_obo(obo_file) cv_name = G.graph["default-namespace"][0] cv_definition = G.graph["data-version"] # Initializing ontology ontology = OntologyLoader(cv_name, cv_definition) for typedef in G.graph["typedefs"]: ontology.store_type_def(typedef) # Testing cv test_cv = Cv.objects.get(name="sequence") self.assertEqual("sequence", test_cv.name) self.assertEqual("so.obo(fake)", test_cv.definition) # Testing store_type_def test_db = Db.objects.get(name="_global") self.assertEqual("_global", test_db.name) test_dbxref = Dbxref.objects.get(db=test_db, accession="derives_from") self.assertEqual("derives_from", test_dbxref.accession) test_cvterm = Cvterm.objects.get(dbxref=test_dbxref) self.assertEqual("derives_from", test_cvterm.name) self.assertEqual( '"testing def loading." [PMID:999090909]', test_cvterm.definition ) test_type = Cvterm.objects.get(name="comment") test_comment = Cvtermprop.objects.get( cvterm_id=test_cvterm.cvterm_id, type_id=test_type.cvterm_id ) self.assertEqual("Fake typedef data.", test_comment.value) test_type = Cvterm.objects.get(name="is_class_level") test_prop = Cvtermprop.objects.get( cvterm_id=test_cvterm.cvterm_id, type_id=test_type.cvterm_id ) self.assertEqual("1", test_prop.value) test_type = Cvterm.objects.get(name="is_metadata_tag") test_prop = Cvtermprop.objects.get( cvterm_id=test_cvterm.cvterm_id, type_id=test_type.cvterm_id ) self.assertEqual("1", test_prop.value) test_type = Cvterm.objects.get(name="is_symmetric") test_prop = Cvtermprop.objects.get( cvterm_id=test_cvterm.cvterm_id, type_id=test_type.cvterm_id ) self.assertEqual("1", test_prop.value) test_type = Cvterm.objects.get(name="is_transitive") test_prop = Cvtermprop.objects.get( cvterm_id=test_cvterm.cvterm_id, type_id=test_type.cvterm_id ) self.assertEqual("1", test_prop.value) test_dbxref = Dbxref.objects.get(accession="0123") test_cvterm_dbxref = CvtermDbxref.objects.get( cvterm=test_cvterm, dbxref=test_dbxref ) self.assertEqual(0, test_cvterm_dbxref.is_for_definition)
def run(self): self.node_header.extend(["comments", "description"]) hpo_node_type = "biolink:PhenotypicFeature" hpo_edge_label = "rdfs:subClassOf" hpo_ro_relation = "RO:0002351" hpo_obo_file = os.path.join(self.input_base_dir, "hp.obo") # transform data, something like: with open(self.output_node_file, 'w') as node, \ open(self.output_edge_file, 'w') as edge: # write headers (change default node/edge headers if necessary node.write("\t".join(self.node_header) + "\n") edge.write("\t".join(self.edge_header) + "\n") graph = obonet.read_obo(hpo_obo_file) for id_, data in graph.nodes(data=True): # Write HPO nodes self.write_hpo_node(node, id_, data, hpo_node_type) # if we see is_a relationship(s), write parent-child edge(s) if 'is_a' in data: for parent in data['is_a']: self.write_hpo_edge(edge, id_, hpo_edge_label, parent, hpo_ro_relation)
def _sample_graph(prefix): from obonet import read_obo from urllib.error import HTTPError url = f'http://purl.obolibrary.org/obo/{prefix}.obo' try: graph = read_obo(url) except HTTPError: print( f'{prefix} URL invalid {url}. See: http://www.obofoundry.org/ontology/{prefix}' ) return False except ValueError: print( f'Issue parsing {url}. See: http://www.obofoundry.org/ontology/{prefix}' ) return False nodes = (node for node in graph if node.lower().startswith(prefix)) nodes = [node for node, _ in zip(nodes, range(10))] if not nodes: print(f'No own terms in {prefix}') for node in nodes: print(' example', node) if all(len(nodes[0]) == len(node) for node in nodes[1:]): return len(nodes[0]) - 1 - len(prefix)
def load_chebi(): """Load ChEBI ontology from local file 'chebi.obo' or from online source. Ensures: ontology_graph: is a MultiDiGraph object from Networkx representing ChEBI ontology; name_to_id: is dict with mappings between each ontology concept name and the respective ChEBI id; synonym_to_id: is dict with mappings between each ontology concept name and the respective ChEBI id; """ print("Loading ChEBI ontology...") graph = obonet.read_obo("chebi.obo") # Load the ontology from local file # Add root concept to the graph root_concept = "CHEBI:00000" graph.add_node(root_concept, name="ROOT") graph = graph.to_directed() # Create mappings name_to_id, synonym_to_id, edge_list = {}, {}, [] for node in graph.nodes(data=True): node_id, node_name = node[0], node[1]["name"] name_to_id[node_name] = node_id if 'is_a' in node[1].keys( ): # The root node of the ontology does not have is_a relationships for related_node in node[1][ 'is_a']: # Build the edge_list with only "is-a" relationships relationship = (node[0], related_node) edge_list.append(relationship) if "synonym" in node[1].keys( ): # Check for synonyms for node (if they exist) for synonym in node[1]["synonym"]: synonym_name = synonym.split("\"")[1] synonym_to_id[synonym_name] = node_id # Create a MultiDiGraph object with only "is-a" relations - this will allow the further calculation of shorthest path lenght ontology_graph = nx.MultiDiGraph([edge for edge in edge_list]) # Add edges between the ontology root and sub-ontology roots chemical_entity = "CHEBI:24431" role = "CHEBI:50906" subatomic_particle = "CHEBI:36342" application = "CHEBI:33232" ontology_graph.add_node(root_concept, name="ROOT") ontology_graph.add_edge(chemical_entity, root_concept, edgetype='is_a') ontology_graph.add_edge(role, root_concept, edgetype='is_a') ontology_graph.add_edge(subatomic_particle, root_concept, edgetype='is_a') ontology_graph.add_edge(application, root_concept, edgetype='is_a') print("Is ontology_graph acyclic:", nx.is_directed_acyclic_graph(ontology_graph)) print("ChEBI loading complete") return ontology_graph, name_to_id, synonym_to_id
def test_store_relationship(self): """Tests - store relationship.""" directory = os.path.dirname(os.path.abspath(__file__)) file = os.path.join(directory, "data", "so_fake.obo") with open(file) as obo_file: G = obonet.read_obo(obo_file) cv_name = G.graph["default-namespace"][0] cv_definition = G.graph["data-version"] # Initializing ontology ontology = OntologyLoader(cv_name, cv_definition) for typedef in G.graph["typedefs"]: ontology.store_type_def(typedef) for n, data in G.nodes(data=True): ontology.store_term(n, data) for u, v, type in G.edges(keys=True): ontology.store_relationship(u, v, type) # Testing store_term test_subject_dbxref = Dbxref.objects.get(accession="0000013") test_subject_cvterm = Cvterm.objects.get(dbxref=test_subject_dbxref) self.assertEqual("scRNA", test_subject_cvterm.name) test_object_dbxref = Dbxref.objects.get(accession="0000012") test_object_cvterm = Cvterm.objects.get(dbxref=test_object_dbxref) self.assertEqual("scRNA_primary_transcript", test_object_cvterm.name) test_type = CvtermRelationship.objects.get( subject=test_subject_cvterm, object=test_object_cvterm ) test_type_cvterm = Cvterm.objects.get(cvterm_id=test_type.type_id) self.assertEqual("derives_from", test_type_cvterm.name)
def main(): """ Main block """ # Parse arguments parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--obo', help='Path to HPO .obo file. [default: ' + 'http://purl.obolibrary.org/obo/hp.obo]', default='http://purl.obolibrary.org/obo/hp.obo', metavar='(file|url)') parser.add_argument('-o', '--outfile', help='Path to outfile. ' + '[default: stdout]', metavar='file') args = parser.parse_args() # Open connection to obo hpo_g = obonet.read_obo(args.obo) # Open connection to outfile if args.outfile is None: outfile = stdout else: outfile = open(args.outfile, 'w') # Add header to outfile outfile.write('#HPO\tdescription\n') # Convert obo to tsv obo2tsv(hpo_g, outfile)
def get_obo_graph(dir, restrict_go_nodes_list): obo_graph = obonet.read_obo(dir) alt_ids_dic = {} for idx, node in obo_graph.nodes(data=True): if 'alt_id' in node: if type(node['alt_id']) == list: for alt_id in node['alt_id']: alt_ids_dic[alt_id] = idx else: alt_ids_dic[node['alt_id']] = idx if restrict_go_nodes_list: go_nodes_list = import_go_nodes_list() curated_go_nodes_list = set() for node in go_nodes_list: if node in alt_ids_dic: curated_go_nodes_list.add(alt_ids_dic[node]) elif node in obo_graph.nodes(): curated_go_nodes_list.add(node) # TAKES TOO LONG; BETTER WORK WITH WHOLE GRAPH # Refactor graph to reconnect nodes linked to deleted nodes. # nodes_to_remove = [] # for node in obo_graph.nodes().keys(): # if node not in curated_go_nodes_list \ # and len(obo_graph._pred[node]) > 0 and len(obo_graph._succ[node]) > 0: # for incoming_node in obo_graph._pred[node]: # for outgoing_node, value in obo_graph._succ[node].items(): # obo_graph.add_edge(incoming_node, outgoing_node, key=list(value)[0]) # nodes_to_remove.append(node) # obo_graph.remove_nodes_from(curated_go_nodes_list) return obo_graph, alt_ids_dic, curated_go_nodes_list
def __init__(self, obo_file, hpo2gene, disease2hpo): self._obofile = obo_file self._disease_df = pd.read_csv( disease2hpo, sep="\t", usecols=[0, 1, 2, 4], names=['source', 'disease_id', 'disease', 'HPO_id'], comment="#") self._gene_df = pd.read_csv(hpo2gene, sep="\t", usecols=[0, 3], names=['HPO_id', 'gene'], comment="#") self._HPO2gene = self._gene_df.groupby( by='HPO_id')['gene'].apply(list).reset_index(name='genes') self._disease2genes = self._disease_df.merge(self._HPO2gene, on='HPO_id') self._disease2genes = self._disease2genes.groupby( by=['source', 'disease_id'])['genes'].agg( genes=pd.NamedAgg(column='genes', aggfunc='sum')) self._disease_df.set_index(['source', 'disease_id'], inplace=True) self._gene_df.set_index('gene', inplace=True) self._HPO2gene.set_index('HPO_id', inplace=True) self._ontology, self._obsoletes = obonet.read_obo(obo_file) self.id_to_name = { id_: data.get('name') for id_, data in self._ontology.nodes(data=True) } self.name_to_id = { data['name']: id_ for id_, data in self._ontology.nodes(data=True) if 'name' in data } self.n_terms = len(self._ontology)
def _get_obo_via_obonet(prefix: str, *, url: Optional[str] = None, local: bool = False) -> Obo: """Get the OBO file by prefix or URL.""" if url is None: path = _ensure_obo_path(prefix) elif local: path = url else: path = get_prefix_obo_path(prefix) if not os.path.exists(path): logger.info('[%s] downloading OBO from %s to %s', prefix, url, path) urlretrieve(url, path) logger.info('[%s] parsing with obonet from %s', prefix, path) with open(path) as file: graph = obonet.read_obo( tqdm(file, unit_scale=True, desc=f'[{prefix}] parsing obo')) if 'ontology' not in graph.graph: logger.warning('[%s] missing "ontology" key', prefix) graph.graph['ontology'] = prefix elif not graph.graph['ontology'].isalpha(): logger.warning( '[%s] ontology=%s has a strange format. replacing with prefix', prefix, graph.graph['ontology']) graph.graph['ontology'] = prefix return Obo.from_obonet(graph)
def load_network(self, file_resources): for file in file_resources: if ".obo" in file: network = obonet.read_obo(file_resources[file]) # network = network.reverse(copy=True) node_list = np.array(network.nodes) return network, node_list
def __init__( self, url='https://raw.githubusercontent.com/The-Sequence-Ontology/SO-Ontologies/master/so.obo' ): self.url = url self.graph = obonet.read_obo(url) assert networkx.is_directed_acyclic_graph(self.graph) self.ancestor_cache = dict()
def test_read_taxrank_path(extension): """ Test reading the taxrank ontology OBO file from paths. Includes reading compressed paths. """ path = os.path.join(directory, 'data', 'taxrank.obo' + extension) taxrank = obonet.read_obo(path) assert len(taxrank) == 61
def test_read_brenda_subset(): """ Test reading a subset of the BrendaTissue.obo file. This file does not set the ontology tag. See https://github.com/dhimmel/obonet/issues/10. """ path = os.path.join(directory, 'data', 'brenda-subset.obo') brenda = obonet.read_obo(path) assert len(brenda) == 1 assert brenda.graph['name'] is None
def test_read_taxrank_url(extension): """ Test reading the taxrank ontology OBO file from paths. Includes reading compressed paths. """ url = 'https://github.com/dhimmel/obonet/raw/master/tests/data/taxrank.obo' url += extension taxrank = obonet.read_obo(url) assert len(taxrank) == 61
def test_presence_of_obsolete_nodes(): """Test that we did, indeed, capture those obsolete entries""" pytest.importorskip("networkx", minversion="2.0") path = os.path.join(directory, 'data', 'brenda-subset.obo') brenda = obonet.read_obo(path, ignore_obsolete=False) nodes = brenda.nodes(data=True) assert "BTO:0000311" in nodes node = nodes['BTO:0000311'] assert node['is_obsolete'] == 'true'
def test_read_taxrank_file(): """ Test reading the taxrank ontology OBO file. """ path = os.path.join(directory, 'data', 'taxrank.obo') with open(path, 'rt') as read_file: taxrank = obonet.read_obo(read_file) assert len(taxrank) == 61 assert taxrank.node['TAXRANK:0000001']['name'] == 'phylum' assert 'NCBITaxon:kingdom' in taxrank.node['TAXRANK:0000017']['xref']
def test_read_taxrank_path(extension, pathlike): """ Test reading the taxrank ontology OBO file from paths. Includes reading compressed paths. """ path = os.path.join(directory, 'data', 'taxrank.obo' + extension) if pathlike: path = pathlib.Path(path) taxrank = obonet.read_obo(path) assert len(taxrank) == 61
def verify_obo(path): """ Verifies that the provided obo is a HUPO-PSI obo :param path: path to obo file :return: whether the file schema is HUPO-PSI :rtype: bool """ loaded = obonet.read_obo(path) return all([ loaded.graph['ontology'] == 'ms', # MS ontology 'publisher: HUPO Proteomics Standards Initiative Mass Spectrometry Standards Working Group and HUPO Proteomics ' 'Standards Initiative Proteomics Informatics Working Group' in loaded.graph['remark'], # correct publisher ])