Exemple #1
0
def obo(path: str, check: bool):
    """Export CONSO as OBO."""
    get_obo().write_obo(path)

    if check:
        import obonet
        obonet.read_obo(path)
Exemple #2
0
def get_ontology(obo_link = '../ontologies/go-basic.obo', reverse_graph = "false"):

    try:
        graph = obonet.read_obo(obo_link)
    except Exception as es:
        logging.info(es)
        graph = obonet.read_obo(obo_link)
        #obo_link = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

    logging.info(obo_link)
    numberOfNodes = graph.number_of_nodes() 
    
    logging.info("Number of nodes: {}".format(numberOfNodes))
    reverseGraph = nx.DiGraph()

    ## generate whole graph first, we'll specialize later.
    wholeset = set()
    for edge in list(graph.edges()):
        edge_info = set(graph.get_edge_data(edge[0], edge[1]).keys())
        wholeset = wholeset.union(edge_info)
        for itype in edge_info:
            if itype == "is_a" or itype == "part_of":
                if reverse_graph == "true":
                    reverseGraph.add_edge(edge[1], edge[0], type=itype)
                else:
                    reverseGraph.add_edge(edge[0], edge[1], type=itype)
    logging.info(nx.info(reverseGraph))
    tnum = len(wholeset)
    logging.info("Found {} unique edge types, {}".format(tnum," | ".join(wholeset)))
    return reverseGraph
Exemple #3
0
def textualize_top_k_terms(json_data, mapping, obo_link, class_names,  k_number = 5):
    """
        This method prints the names of the *k_number* most important terms for each class (according to genQ)
    """
    try:
        graph = obonet.read_obo(obo_link)
    except Exception as es:
        logging.info(es)
        graph = obonet.read_obo(obo_link)

    id_to_name = {id_: data.get('name') for id_, data in graph.nodes(data=True)}

    ## go through mapping
    mc = {}
    all_terms = set()
    mappings = read_generic_gaf(mapping)
    for k, v in mappings.items():
        for el in v:
            all_terms.add(el)
            if el in mc:
                mc[el] += 1
            else:
                mc[el] = 1
    normalization = len(all_terms)

    counter = 0
    for keyClass in json_data["resulting_generalization"].keys():
        first = True
        print()
        if keyClass != "average_depth" and keyClass != "average_association":
            genQ_dict = {}
            for term in json_data["resulting_generalization"][keyClass]["terms"]:
                IC = IC_of_a_term(term, mappings, mc, normalization)
                genQ = 1 - IC / 9.82
                genQ_dict[term] = genQ
            for n in range(k_number):
                max = 0
                term = ""
                for k,v in genQ_dict.items():
                    if v >= max:
                        max = v
                        term = k
                if first:
                    print("Class " + str(keyClass) + " :− " + str(id_to_name[term]))
                    first = False
                else:
                    print("^" + str(id_to_name[term]))
                genQ_dict[term] = -1
        counter += 1
def load_disease_file(disease_download_file, disease_output_file, disease_xref_output_file):

    # parse ontology
    ont = obonet.read_obo(disease_download_file)

    # build child node lookup dictionary
    child_dict = build_child_dict(ont)

    # build filter
    #do_filter = get_infectious_diseases(ont, child_dict)
    do_filter = None

    xref_list = []
    with open(disease_output_file, "w", newline='') as outfile:
        writer = csv.writer(outfile, delimiter="\t")
        writer.writerow(["doid", "name", "definition", "parents", "link", "source", "license"])
        for id_, data in ont.nodes(data=True):
            if do_filter == None or id_ in do_filter:
                row = parse_ontology_entry(id_, data)
                writer.writerow(row)

                parse_ontology_xref(id_, data, xref_list)

    xref_df = pd.DataFrame(xref_list)
    xref_df.to_csv(disease_xref_output_file, sep="\t", index=False)
Exemple #5
0
def return_archived_ontology(version):
    '''
    This function returns an archived ontology based on the version number.
    '''
    url = 'ftp://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel' + version + '/ontology/chebi.obo'
    graph = obonet.read_obo(url)
    return graph
Exemple #6
0
    def update_resource(directory,
                        url,
                        prefix,
                        *args,
                        remove_prefix=False,
                        allowed_synonyms=None):
        """Write the OBO information to files in the given directory."""
        resource_path = _make_resource_path(directory, prefix)
        obo_path = os.path.join(directory, '%s.obo.pkl' % prefix)
        if os.path.exists(obo_path):
            with open(obo_path, 'rb') as file:
                g = pickle.load(file)
        else:
            g = obonet.read_obo(url)
            with open(obo_path, 'wb') as file:
                pickle.dump(g, file)

        entries = \
            OboClient.entries_from_graph(g, prefix=prefix,
                                         remove_prefix=remove_prefix,
                                         allowed_synonyms=allowed_synonyms)
        entries = prune_empty_entries(
            entries, {'synonyms', 'xrefs', 'alt_ids', 'relations'})
        with open(resource_path, 'w') as file:
            json.dump(entries, file, indent=1, sort_keys=True)
def _propagate_GO(gene_mapper_file, tree, out_file):
    """
    Script for Propagation of GO terms.

    Input files required:
    1) go graph
    2) EggNOG GO predictions
    3) Name of output file

    Output:
    1) propagated Gene ontologies
    """

    with open(tree, 'r') as f:
        go_graph = obonet.read_obo(f)

    # load gene mapper table
    mapped_genes = pd.read_csv(gene_mapper_file, sep="\t")

    # drop NAN in the Gene ID column
    mapped_genes = mapped_genes[mapped_genes['Gene ID'].notna()]

    # subset data and drop NANs in the GOs column
    genes_GO_df = mapped_genes[["Gene ID", "GOs"]].dropna()

    # propagate GO terms
    genes_GO_df['GOs_propagated'] = genes_GO_df['GOs'].str.split(',').\
        apply(propagate_go, go_graph=go_graph)

    # save the file
    genes_GO_df.to_csv(out_file, sep='\t', index=False)
Exemple #8
0
 def __init__(self, path):
     """
     Initialize an ontology class by providing the path.
     :param path:
     """
     self.graph = read_obo(path)
     self.root_id = self._find_root_id()
Exemple #9
0
def load_chebi(path="ftp://ftp.ebi.ac.uk/pub/databases/chebi/ontology/chebi.obo"):
    print("loading chebi from {}...".format(path))
    #graph = obonet.read_obo("data/chebi.obo")
    graph = obonet.read_obo(path)
    graph.add_node(root_concept, name="ROOT")
    graph.add_edge(chemical_entity, root_concept, edgetype='is_a')
    graph.add_edge(role, root_concept, edgetype='is_a')
    graph.add_edge(subatomic_particle, root_concept, edgetype='is_a')
    graph.add_edge(application, root_concept, edgetype='is_a')
    #print([dir(d) for u,v,d in graph.edges(data=True)])
    #sys.exit()
    graph = graph.to_directed()
    is_a_graph=networkx.MultiDiGraph([(u,v,d) for u,v,d in graph.edges(data=True) if d['edgetype'] == "is_a"] )
    #print(networkx.is_directed_acyclic_graph(is_a_graph))
    id_to_name = {id_: data['name'] for id_, data in graph.nodes(data=True)}
    name_to_id = {data['name']: id_ for id_, data in graph.nodes(data=True)}
    id_to_index = {e: i+1 for i, e in enumerate(graph.nodes())} # ids should start on 1 and not 0
    id_to_index[""] = 0
    synonym_to_id = {}
    print("synonyms to ids...")
    for n in graph.nodes(data=True):
        # print(n[1].get("synonym"))
        for syn in n[1].get("synonym", []):
            syn_name = syn.split('"')
            if len(syn_name) > 2:
                syn_name = syn.split('"')[1]
                synonym_to_id.setdefault(syn_name, []).append(n[0])
            #else:
                #print("not a synonym:", syn.split('"'))

    #print(synonym_to_id)
    print("done.", len(name_to_id), "ids", len(synonym_to_id), "synonyms")
    return is_a_graph, name_to_id, synonym_to_id, id_to_name, id_to_index
Exemple #10
0
def parse_hpo_obo(path):
    """Parse HPO obo file.
    return a network MultiDiGraph object
    """
    import obonet
    graph = obonet.read_obo(path)
    return graph
Exemple #11
0
def scHCL_adata(adata, verbose=False, n_cores=1, n_min=10):
    """
    previous main function
    """

    ref_df = reference_hcl.load_HCL_reference()
    transformed_adata = process_adata(adata, ref_df)

    scHCL_df, scHCL_df_extended_Celltypes = call_celltypes(
        transformed_adata, ref_df, n_cores)
    scHCL_df = annotate_refined(scHCL_df, n_min)

    # cell ontology
    graph = obonet.read_obo('http://purl.obolibrary.org/obo/cl/cl-basic.obo')
    id_to_name = {
        id_: data.get('name')
        for id_, data in graph.nodes(data=True)
    }

    scHCL_df['CLid'] = scHCL_df['hcl_refined'].apply(
        lambda x: reference_hcl.refined_celltypes_to_cell_ontology[x] if x in
        reference_hcl.refined_celltypes_to_cell_ontology else 'unknown')
    scHCL_df['CL_name'] = scHCL_df['CLid'].apply(
        lambda x: id_to_name[x] if x in id_to_name else 'unknown')

    return scHCL_df, scHCL_df_extended_Celltypes
Exemple #12
0
def test_read_obo(ontology):
    """
    Test that reading ontology does not error.
    """
    url = 'http://purl.obolibrary.org/obo/{}.obo'.format(ontology)
    graph = obonet.read_obo(url)
    assert graph
    def __init__(self,
                 obo_path=OBOURL,
                 ):
        """
        Loads and interprets a PSI-MS obo file into a python-interpretable format.

        :param obo_path: file path or url to an obo file
        """
        CVParameterSet.__init__(self)
        try:
            self.obo_file = obonet.read_obo(obo_path)  # read the obo file
        except (FileNotFoundError, urllib.error.HTTPError):
            raise FileNotFoundError(f'An obo file could not be found at the provided path or URL: {obo_path}')
        if obo_path == OBOURL:  # remind the user to cite the pulication
            print('Data was read from PSI-MS, please cite DOI: 10.1093/database/bat009')
        self.format_version = self.obo_file.graph['format-version']
        self.data_version = self.obo_file.graph['data-version']
        for acc in self.obo_file:
            dct = self.obo_file.node[acc]
            if 'def' in dct:  # if the invalid key def is in the dictionary, convert and remove
                dct['definition'] = dct['def']
                del dct['def']
            self.cv_values[acc] = CVParam(
                id=acc,
                **dct,
            )
Exemple #14
0
def load(obo_file):
    """
    Load OBO file into a networkx graph.

    :param obo_file: OBO definition file.
    :param logger: Python `logging` logger instance.
    :return: `networkx.MultiDiGraph`
    """
    try:
        hpo_network = obonet.read_obo(obo_file)
        #return nx.MultiDiGraph(hpo_network.subgraph(['HP:0000118'] + list(nx.ancestors(hpo_network, 'HP:0000118'))))
    except (FileNotFoundError, PermissionError) as e:
        if logger is not None:
            logger.critical(e)
        else:
            sys.stderr.write(str(e))
        exit(1)

    # roots for non-phenotype nodes
    non_phenotypes = {
        'mortality_aging': 'HP:0040006',
        'mode_of_inheritance': 'HP:0000005',
        'clinical_modifier': 'HP:0012823',
        'frequency': 'HP:0040279',
        'clinical_course': 'HP:0031797',
    }

    # remove non-phenotype branches
    for _, hpo_id in non_phenotypes.items():
        if hpo_id in hpo_network.nodes:
            children = nx.ancestors(hpo_network, hpo_id)
            hpo_network.remove_nodes_from([hpo_id] + list(children))

    return hpo_network
    def handle(self, file: str, verbosity: int = 1, **options):
        """Execute the main function."""
        try:
            FileValidator().validate(file)
        except ImportingError as e:
            raise CommandError(e)

        # Load the ontology file
        with open(file) as obo_file:
            G = obonet.read_obo(obo_file)

        if verbosity > 0:
            self.stdout.write("Preprocessing")

        cv_name = "relationship"

        # Initializing ontology
        ontology = OntologyLoader(cv_name)

        # Load typedefs as Dbxrefs and Cvterm
        if verbosity > 0:
            self.stdout.write("Loading typedefs")

        for data in tqdm(G.graph["typedefs"],
                         disable=False if verbosity > 0 else True):
            ontology.store_type_def(data)

        if verbosity > 0:
            self.stdout.write(self.style.SUCCESS("Done"))
Exemple #16
0
    def __init__(
        self,
        obo_path=OBOURL,
    ):
        """
        Loads and interprets a PSI-MS obo file into a python-interpretable format.

        :param obo_path: file path or url to an obo file
        """
        CVParameterSet.__init__(self)
        try:
            self.obo_file = obonet.read_obo(obo_path)  # read the obo file
        except (FileNotFoundError, urllib.error.HTTPError):
            raise FileNotFoundError(
                f'An obo file could not be found at the provided path or URL: {obo_path}'
            )
        if obo_path == OBOURL:  # remind the user to cite the pulication
            print(
                'Data was read from PSI-MS, please cite DOI: 10.1093/database/bat009'
            )
        self.format_version = self.obo_file.graph['format-version']
        self.data_version = self.obo_file.graph['data-version']
        for acc in self.obo_file:
            dct = self.obo_file.nodes[acc]
            if 'def' in dct:  # if the invalid key def is in the dictionary, convert and remove
                dct['definition'] = dct['def']
                del dct['def']
            self.cv_values[acc] = CVParam(
                id=acc,
                **dct,
            )
    def test_store_type_def(self):
        """Tests - store type_def."""
        directory = os.path.dirname(os.path.abspath(__file__))
        file = os.path.join(directory, "data", "so_fake.obo")

        with open(file) as obo_file:
            G = obonet.read_obo(obo_file)

        cv_name = G.graph["default-namespace"][0]
        cv_definition = G.graph["data-version"]
        # Initializing ontology
        ontology = OntologyLoader(cv_name, cv_definition)
        for typedef in G.graph["typedefs"]:
            ontology.store_type_def(typedef)

        # Testing cv
        test_cv = Cv.objects.get(name="sequence")
        self.assertEqual("sequence", test_cv.name)
        self.assertEqual("so.obo(fake)", test_cv.definition)

        # Testing store_type_def
        test_db = Db.objects.get(name="_global")
        self.assertEqual("_global", test_db.name)
        test_dbxref = Dbxref.objects.get(db=test_db, accession="derives_from")
        self.assertEqual("derives_from", test_dbxref.accession)
        test_cvterm = Cvterm.objects.get(dbxref=test_dbxref)
        self.assertEqual("derives_from", test_cvterm.name)
        self.assertEqual(
            '"testing def loading." [PMID:999090909]', test_cvterm.definition
        )
        test_type = Cvterm.objects.get(name="comment")
        test_comment = Cvtermprop.objects.get(
            cvterm_id=test_cvterm.cvterm_id, type_id=test_type.cvterm_id
        )
        self.assertEqual("Fake typedef data.", test_comment.value)
        test_type = Cvterm.objects.get(name="is_class_level")
        test_prop = Cvtermprop.objects.get(
            cvterm_id=test_cvterm.cvterm_id, type_id=test_type.cvterm_id
        )
        self.assertEqual("1", test_prop.value)
        test_type = Cvterm.objects.get(name="is_metadata_tag")
        test_prop = Cvtermprop.objects.get(
            cvterm_id=test_cvterm.cvterm_id, type_id=test_type.cvterm_id
        )
        self.assertEqual("1", test_prop.value)
        test_type = Cvterm.objects.get(name="is_symmetric")
        test_prop = Cvtermprop.objects.get(
            cvterm_id=test_cvterm.cvterm_id, type_id=test_type.cvterm_id
        )
        self.assertEqual("1", test_prop.value)
        test_type = Cvterm.objects.get(name="is_transitive")
        test_prop = Cvtermprop.objects.get(
            cvterm_id=test_cvterm.cvterm_id, type_id=test_type.cvterm_id
        )
        self.assertEqual("1", test_prop.value)
        test_dbxref = Dbxref.objects.get(accession="0123")
        test_cvterm_dbxref = CvtermDbxref.objects.get(
            cvterm=test_cvterm, dbxref=test_dbxref
        )
        self.assertEqual(0, test_cvterm_dbxref.is_for_definition)
Exemple #18
0
    def run(self):
        self.node_header.extend(["comments", "description"])
        hpo_node_type = "biolink:PhenotypicFeature"
        hpo_edge_label = "rdfs:subClassOf"
        hpo_ro_relation = "RO:0002351"
        hpo_obo_file = os.path.join(self.input_base_dir, "hp.obo")

        # transform data, something like:
        with open(self.output_node_file, 'w') as node, \
             open(self.output_edge_file, 'w') as edge:

            # write headers (change default node/edge headers if necessary
            node.write("\t".join(self.node_header) + "\n")
            edge.write("\t".join(self.edge_header) + "\n")

            graph = obonet.read_obo(hpo_obo_file)

            for id_, data in graph.nodes(data=True):

                # Write HPO nodes
                self.write_hpo_node(node, id_, data, hpo_node_type)

                # if we see is_a relationship(s), write parent-child edge(s)
                if 'is_a' in data:
                    for parent in data['is_a']:
                        self.write_hpo_edge(edge, id_, hpo_edge_label, parent,
                                            hpo_ro_relation)
Exemple #19
0
def _sample_graph(prefix):
    from obonet import read_obo
    from urllib.error import HTTPError
    url = f'http://purl.obolibrary.org/obo/{prefix}.obo'
    try:
        graph = read_obo(url)
    except HTTPError:
        print(
            f'{prefix} URL invalid {url}. See: http://www.obofoundry.org/ontology/{prefix}'
        )
        return False
    except ValueError:
        print(
            f'Issue parsing {url}. See: http://www.obofoundry.org/ontology/{prefix}'
        )
        return False

    nodes = (node for node in graph if node.lower().startswith(prefix))
    nodes = [node for node, _ in zip(nodes, range(10))]
    if not nodes:
        print(f'No own terms in {prefix}')
    for node in nodes:
        print('  example', node)

    if all(len(nodes[0]) == len(node) for node in nodes[1:]):
        return len(nodes[0]) - 1 - len(prefix)
Exemple #20
0
def load_chebi():
    """Load ChEBI ontology from local file 'chebi.obo' or from online source.
    
    Ensures: 
        ontology_graph: is a MultiDiGraph object from Networkx representing ChEBI ontology;
        name_to_id: is dict with mappings between each ontology concept name and the respective ChEBI id;
        synonym_to_id: is dict with mappings between each ontology concept name and the respective ChEBI id;
    """

    print("Loading ChEBI ontology...")

    graph = obonet.read_obo("chebi.obo")  # Load the ontology from local file

    # Add root concept to the graph
    root_concept = "CHEBI:00000"
    graph.add_node(root_concept, name="ROOT")
    graph = graph.to_directed()

    # Create mappings
    name_to_id, synonym_to_id, edge_list = {}, {}, []

    for node in graph.nodes(data=True):

        node_id, node_name = node[0], node[1]["name"]
        name_to_id[node_name] = node_id

        if 'is_a' in node[1].keys(
        ):  # The root node of the ontology does not have is_a relationships

            for related_node in node[1][
                    'is_a']:  # Build the edge_list with only "is-a" relationships
                relationship = (node[0], related_node)
                edge_list.append(relationship)

        if "synonym" in node[1].keys(
        ):  # Check for synonyms for node (if they exist)

            for synonym in node[1]["synonym"]:
                synonym_name = synonym.split("\"")[1]
                synonym_to_id[synonym_name] = node_id

    # Create a MultiDiGraph object with only "is-a" relations - this will allow the further calculation of shorthest path lenght
    ontology_graph = nx.MultiDiGraph([edge for edge in edge_list])

    # Add edges between the ontology root and sub-ontology roots
    chemical_entity = "CHEBI:24431"
    role = "CHEBI:50906"
    subatomic_particle = "CHEBI:36342"
    application = "CHEBI:33232"
    ontology_graph.add_node(root_concept, name="ROOT")
    ontology_graph.add_edge(chemical_entity, root_concept, edgetype='is_a')
    ontology_graph.add_edge(role, root_concept, edgetype='is_a')
    ontology_graph.add_edge(subatomic_particle, root_concept, edgetype='is_a')
    ontology_graph.add_edge(application, root_concept, edgetype='is_a')

    print("Is ontology_graph acyclic:",
          nx.is_directed_acyclic_graph(ontology_graph))
    print("ChEBI loading complete")

    return ontology_graph, name_to_id, synonym_to_id
    def test_store_relationship(self):
        """Tests - store relationship."""
        directory = os.path.dirname(os.path.abspath(__file__))
        file = os.path.join(directory, "data", "so_fake.obo")

        with open(file) as obo_file:
            G = obonet.read_obo(obo_file)

        cv_name = G.graph["default-namespace"][0]
        cv_definition = G.graph["data-version"]
        # Initializing ontology
        ontology = OntologyLoader(cv_name, cv_definition)
        for typedef in G.graph["typedefs"]:
            ontology.store_type_def(typedef)
        for n, data in G.nodes(data=True):
            ontology.store_term(n, data)
        for u, v, type in G.edges(keys=True):
            ontology.store_relationship(u, v, type)

        # Testing store_term
        test_subject_dbxref = Dbxref.objects.get(accession="0000013")
        test_subject_cvterm = Cvterm.objects.get(dbxref=test_subject_dbxref)
        self.assertEqual("scRNA", test_subject_cvterm.name)
        test_object_dbxref = Dbxref.objects.get(accession="0000012")
        test_object_cvterm = Cvterm.objects.get(dbxref=test_object_dbxref)
        self.assertEqual("scRNA_primary_transcript", test_object_cvterm.name)

        test_type = CvtermRelationship.objects.get(
            subject=test_subject_cvterm, object=test_object_cvterm
        )
        test_type_cvterm = Cvterm.objects.get(cvterm_id=test_type.type_id)
        self.assertEqual("derives_from", test_type_cvterm.name)
Exemple #22
0
def test_read_obo(ontology):
    """
    Test that reading ontology does not error.
    """
    url = 'http://purl.obolibrary.org/obo/{}.obo'.format(ontology)
    graph = obonet.read_obo(url)
    assert graph
Exemple #23
0
def main():
    """
    Main block
    """

    # Parse arguments
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--obo',
                        help='Path to HPO .obo file. [default: ' +
                        'http://purl.obolibrary.org/obo/hp.obo]',
                        default='http://purl.obolibrary.org/obo/hp.obo',
                        metavar='(file|url)')
    parser.add_argument('-o',
                        '--outfile',
                        help='Path to outfile. ' + '[default: stdout]',
                        metavar='file')
    args = parser.parse_args()

    # Open connection to obo
    hpo_g = obonet.read_obo(args.obo)

    # Open connection to outfile
    if args.outfile is None:
        outfile = stdout
    else:
        outfile = open(args.outfile, 'w')

    # Add header to outfile
    outfile.write('#HPO\tdescription\n')

    # Convert obo to tsv
    obo2tsv(hpo_g, outfile)
Exemple #24
0
def get_obo_graph(dir, restrict_go_nodes_list):
    obo_graph = obonet.read_obo(dir)
    alt_ids_dic = {}
    for idx, node in obo_graph.nodes(data=True):
        if 'alt_id' in node:
            if type(node['alt_id']) == list:
                for alt_id in node['alt_id']:
                    alt_ids_dic[alt_id] = idx
            else:
                alt_ids_dic[node['alt_id']] = idx
    if restrict_go_nodes_list:
        go_nodes_list = import_go_nodes_list()
        curated_go_nodes_list = set()
        for node in go_nodes_list:
            if node in alt_ids_dic:
                curated_go_nodes_list.add(alt_ids_dic[node])
            elif node in obo_graph.nodes():
                curated_go_nodes_list.add(node)
        # TAKES TOO LONG; BETTER WORK WITH WHOLE GRAPH
        # Refactor graph to reconnect nodes linked to deleted nodes.
        # nodes_to_remove = []
        # for node in obo_graph.nodes().keys():
        #     if node not in curated_go_nodes_list \
        #             and len(obo_graph._pred[node]) > 0 and len(obo_graph._succ[node]) > 0:
        #         for incoming_node in obo_graph._pred[node]:
        #             for outgoing_node, value in obo_graph._succ[node].items():
        #                 obo_graph.add_edge(incoming_node, outgoing_node, key=list(value)[0])
        #         nodes_to_remove.append(node)
        # obo_graph.remove_nodes_from(curated_go_nodes_list)
    return obo_graph, alt_ids_dic, curated_go_nodes_list
Exemple #25
0
    def __init__(self, obo_file, hpo2gene, disease2hpo):
        self._obofile = obo_file
        self._disease_df = pd.read_csv(
            disease2hpo,
            sep="\t",
            usecols=[0, 1, 2, 4],
            names=['source', 'disease_id', 'disease', 'HPO_id'],
            comment="#")
        self._gene_df = pd.read_csv(hpo2gene,
                                    sep="\t",
                                    usecols=[0, 3],
                                    names=['HPO_id', 'gene'],
                                    comment="#")
        self._HPO2gene = self._gene_df.groupby(
            by='HPO_id')['gene'].apply(list).reset_index(name='genes')
        self._disease2genes = self._disease_df.merge(self._HPO2gene,
                                                     on='HPO_id')
        self._disease2genes = self._disease2genes.groupby(
            by=['source', 'disease_id'])['genes'].agg(
                genes=pd.NamedAgg(column='genes', aggfunc='sum'))

        self._disease_df.set_index(['source', 'disease_id'], inplace=True)
        self._gene_df.set_index('gene', inplace=True)
        self._HPO2gene.set_index('HPO_id', inplace=True)

        self._ontology, self._obsoletes = obonet.read_obo(obo_file)
        self.id_to_name = {
            id_: data.get('name')
            for id_, data in self._ontology.nodes(data=True)
        }
        self.name_to_id = {
            data['name']: id_
            for id_, data in self._ontology.nodes(data=True) if 'name' in data
        }
        self.n_terms = len(self._ontology)
Exemple #26
0
def _get_obo_via_obonet(prefix: str,
                        *,
                        url: Optional[str] = None,
                        local: bool = False) -> Obo:
    """Get the OBO file by prefix or URL."""
    if url is None:
        path = _ensure_obo_path(prefix)
    elif local:
        path = url
    else:
        path = get_prefix_obo_path(prefix)
        if not os.path.exists(path):
            logger.info('[%s] downloading OBO from %s to %s', prefix, url,
                        path)
            urlretrieve(url, path)

    logger.info('[%s] parsing with obonet from %s', prefix, path)
    with open(path) as file:
        graph = obonet.read_obo(
            tqdm(file, unit_scale=True, desc=f'[{prefix}] parsing obo'))
    if 'ontology' not in graph.graph:
        logger.warning('[%s] missing "ontology" key', prefix)
        graph.graph['ontology'] = prefix
    elif not graph.graph['ontology'].isalpha():
        logger.warning(
            '[%s] ontology=%s has a strange format. replacing with prefix',
            prefix, graph.graph['ontology'])
        graph.graph['ontology'] = prefix
    return Obo.from_obonet(graph)
Exemple #27
0
 def load_network(self, file_resources):
     for file in file_resources:
         if ".obo" in file:
             network = obonet.read_obo(file_resources[file])
             # network = network.reverse(copy=True)
             node_list = np.array(network.nodes)
     return network, node_list
Exemple #28
0
 def __init__(
     self,
     url='https://raw.githubusercontent.com/The-Sequence-Ontology/SO-Ontologies/master/so.obo'
 ):
     self.url = url
     self.graph = obonet.read_obo(url)
     assert networkx.is_directed_acyclic_graph(self.graph)
     self.ancestor_cache = dict()
Exemple #29
0
def test_read_taxrank_path(extension):
    """
    Test reading the taxrank ontology OBO file from paths. Includes reading
    compressed paths.
    """
    path = os.path.join(directory, 'data', 'taxrank.obo' + extension)
    taxrank = obonet.read_obo(path)
    assert len(taxrank) == 61
Exemple #30
0
def test_read_brenda_subset():
    """
    Test reading a subset of the BrendaTissue.obo file. This file does not set
    the ontology tag. See https://github.com/dhimmel/obonet/issues/10.
    """
    path = os.path.join(directory, 'data', 'brenda-subset.obo')
    brenda = obonet.read_obo(path)
    assert len(brenda) == 1
    assert brenda.graph['name'] is None
Exemple #31
0
def test_read_taxrank_url(extension):
    """
    Test reading the taxrank ontology OBO file from paths. Includes reading
    compressed paths.
    """
    url = 'https://github.com/dhimmel/obonet/raw/master/tests/data/taxrank.obo'
    url += extension
    taxrank = obonet.read_obo(url)
    assert len(taxrank) == 61
Exemple #32
0
def test_presence_of_obsolete_nodes():
    """Test that we did, indeed, capture those obsolete entries"""
    pytest.importorskip("networkx", minversion="2.0")
    path = os.path.join(directory, 'data', 'brenda-subset.obo')
    brenda = obonet.read_obo(path, ignore_obsolete=False)
    nodes = brenda.nodes(data=True)
    assert "BTO:0000311" in nodes
    node = nodes['BTO:0000311']
    assert node['is_obsolete'] == 'true'
Exemple #33
0
def test_read_brenda_subset():
    """
    Test reading a subset of the BrendaTissue.obo file. This file does not set
    the ontology tag. See https://github.com/dhimmel/obonet/issues/10.
    """
    path = os.path.join(directory, 'data', 'brenda-subset.obo')
    brenda = obonet.read_obo(path)
    assert len(brenda) == 1
    assert brenda.graph['name'] is None
Exemple #34
0
def test_read_taxrank_url(extension):
    """
    Test reading the taxrank ontology OBO file from paths. Includes reading
    compressed paths.
    """
    url = 'https://github.com/dhimmel/obonet/raw/master/tests/data/taxrank.obo'
    url += extension
    taxrank = obonet.read_obo(url)
    assert len(taxrank) == 61
Exemple #35
0
def test_read_taxrank_file():
    """
    Test reading the taxrank ontology OBO file.
    """
    path = os.path.join(directory, 'data', 'taxrank.obo')
    with open(path, 'rt') as read_file:
        taxrank = obonet.read_obo(read_file)
    assert len(taxrank) == 61
    assert taxrank.node['TAXRANK:0000001']['name'] == 'phylum'
    assert 'NCBITaxon:kingdom' in taxrank.node['TAXRANK:0000017']['xref']
Exemple #36
0
def test_read_taxrank_path(extension, pathlike):
    """
    Test reading the taxrank ontology OBO file from paths. Includes reading
    compressed paths.
    """
    path = os.path.join(directory, 'data', 'taxrank.obo' + extension)
    if pathlike:
        path = pathlib.Path(path)
    taxrank = obonet.read_obo(path)
    assert len(taxrank) == 61
def verify_obo(path):
    """
    Verifies that the provided obo is a HUPO-PSI obo

    :param path: path to obo file
    :return: whether the file schema is HUPO-PSI
    :rtype: bool
    """
    loaded = obonet.read_obo(path)
    return all([
        loaded.graph['ontology'] == 'ms',  # MS ontology
        'publisher: HUPO Proteomics Standards Initiative Mass Spectrometry Standards Working Group and HUPO Proteomics '
        'Standards Initiative Proteomics Informatics Working Group' in loaded.graph['remark'],  # correct publisher
    ])