Beispiel #1
0
    def _get_phenotypicseries_parents(entry, g):
        """
        Extract the phenotypic series parent relationship out of the entry
        :param entry:
        :return:
        """
        model = Model(g)
        omimid = 'OMIM:'+str(entry['mimNumber'])
        # the phenotypic series mappings
        serieslist = []
        if 'phenotypicSeriesExists' in entry:
            if entry['phenotypicSeriesExists'] is True:
                if 'phenotypeMapList' in entry:
                    phenolist = entry['phenotypeMapList']
                    for p in phenolist:
                        serieslist.append(
                            p['phenotypeMap']['phenotypicSeriesNumber'])
                if 'geneMap' in entry and \
                        'phenotypeMapList' in entry['geneMap']:
                    phenolist = entry['geneMap']['phenotypeMapList']
                    for p in phenolist:
                        if 'phenotypicSeriesNumber' in p['phenotypeMap']:
                            serieslist.append(
                                p['phenotypeMap']['phenotypicSeriesNumber'])
        # add this entry as a subclass of the series entry
        for ser in serieslist:
            series_id = 'OMIM:'+ser
            model.addClassToGraph(series_id, None)
            model.addSubClass(omimid, series_id)

        return
Beispiel #2
0
    def _get_phenotypicseries_parents(entry, graph):
        """
        Extract the phenotypic series parent relationship out of the entry
        :param entry:
        :return:
        """
        model = Model(graph)
        omim_num = str(entry['mimNumber'])
        omim_curie = 'OMIM:' + omim_num
        # the phenotypic series mappings
        serieslist = []
        if 'phenotypicSeriesExists' in entry and entry[
                'phenotypicSeriesExists']:
            if 'phenotypeMapList' in entry:
                phenolist = entry['phenotypeMapList']
                for phl in phenolist:
                    if 'phenotypicSeriesNumber' in phl['phenotypeMap']:
                        pns_lst = phl['phenotypeMap']['phenotypicSeriesNumber']
                        for pns in pns_lst.split(','):
                            serieslist.append(pns)
            if 'geneMap' in entry and 'phenotypeMapList' in entry['geneMap']:
                phenolist = entry['geneMap']['phenotypeMapList']
                for phl in phenolist:
                    if 'phenotypicSeriesNumber' in phl['phenotypeMap']:
                        pns_lst = phl['phenotypeMap']['phenotypicSeriesNumber']
                        for pns in pns_lst.split(','):
                            serieslist.append(pns)

        # add this entry as a subclass of the series entry
        for ser in serieslist:
            series_id = 'OMIMPS:' + ser
            model.addClassToGraph(series_id, None)
            model.addSubClass(omim_curie, series_id)
Beispiel #3
0
    def _get_phenotypicseries_parents(entry, graph):
        """
        Extract the phenotypic series parent relationship out of the entry
        :param entry:
        :return:
        """

        model = Model(graph)
        omim_num = str(entry['mimNumber'])
        omim_curie = 'OMIM:' + omim_num
        # the phenotypic series mappings
        serieslist = []
        if 'phenotypeMapList' in entry:
            phenolist = entry['phenotypeMapList']
            for phl in phenolist:
                if 'phenotypicSeriesNumber' in phl['phenotypeMap']:
                    pns_lst = phl['phenotypeMap']['phenotypicSeriesNumber']
                    for pns in pns_lst.split(','):
                        serieslist.append(pns)
        if 'geneMap' in entry and 'phenotypeMapList' in entry['geneMap']:
            phenolist = entry['geneMap']['phenotypeMapList']
            for phl in phenolist:
                if 'phenotypicSeriesNumber' in phl['phenotypeMap']:
                    pns_lst = phl['phenotypeMap']['phenotypicSeriesNumber']
                    for pns in pns_lst.split(','):
                        serieslist.append(pns)

        # add this omim entry as a subclass of the series entry
        if serieslist:
            LOG.info('%s is awarded %i optional PS superclasses!', omim_curie,
                     len(serieslist))
        for phser in set(serieslist):
            series_curie = 'OMIMPS:' + phser
            model.addClassToGraph(series_curie, None)
            model.addSubClass(omim_curie, series_curie)
Beispiel #4
0
    def _get_phenotypicseries_parents(self, entry, graph):
        """
        Extract the phenotypic series parent relationship out of the entry
        :param entry:
        :return:
        """

        model = Model(graph)
        omim_num = str(entry['mimNumber'])
        omim_curie = 'OMIM:' + omim_num
        omimtype = self.omim_type[omim_num]
        # the phenotypic series mappings
        serieslist = []

        if 'phenotypeMapList' in entry:
            phenolist = entry['phenotypeMapList']
            for phl in phenolist:
                if 'phenotypicSeriesNumber' in phl['phenotypeMap']:
                    pns_lst = phl['phenotypeMap']['phenotypicSeriesNumber']
                    for pns in pns_lst.split(','):
                        serieslist.append(pns)
        if 'geneMap' in entry and 'phenotypeMapList' in entry['geneMap']:
            phenolist = entry['geneMap']['phenotypeMapList']
            for phl in phenolist:
                if 'phenotypicSeriesNumber' in phl['phenotypeMap']:
                    pns_lst = phl['phenotypeMap']['phenotypicSeriesNumber']
                    for pns in pns_lst.split(','):
                        serieslist.append(pns)

        # add this omim entry as a subclass of the series entry
        if serieslist:
            LOG.info('%s is awarded %i optional PS superclasses!', omim_curie,
                     len(serieslist))
        for phser in set(serieslist):
            series_curie = 'OMIMPS:' + phser
            model.addClassToGraph(series_curie,
                                  None,
                                  class_category=blv.terms['Disease'])
            if omimtype in [
                    self.globaltt['gene'],
                    self.globaltt['has_affected_feature']
            ]:
                model.addTriple(omim_curie,
                                self.globaltt['contributes to condition'],
                                series_curie)
            elif omimtype in [
                    self.globaltt['phenotype'],
                    self.globaltt['heritable_phenotypic_marker']
            ]:
                model.addSubClass(omim_curie,
                                  series_curie,
                                  child_category=blv.terms['Disease'],
                                  parent_category=blv.terms['Disease'])
            else:
                LOG.info('Unable to map type %s to phenotypic series',
                         omimtype)
Beispiel #5
0
    def _get_equivids(self, limit):
        """
        The file processed here is of the format:
        #NBK_id GR_shortname    OMIM
        NBK1103 trimethylaminuria       136132
        NBK1103 trimethylaminuria       602079
        NBK1104 cdls    122470
        Where each of the rows represents a mapping between
        a gr id and an omim id. These are a 1:many relationship,
        and some of the omim ids are genes(not diseases).
        Therefore, we need to create a loose coupling here.
        We make the assumption that these NBKs are generally higher-level
        grouping classes; therefore the OMIM ids are treated as subclasses.

        (This assumption is poor for those omims that are actually genes,
        but we have no way of knowing what those are here...
        we will just have to deal with that for now.)    -- fixed

        :param limit:
        :return:

        """
        raw = '/'.join((self.rawdir, self.files['idmap']['file']))
        model = Model(self.graph)
        LOG.info('Looping over %s', raw)
        # we look some stuff up in OMIM, so initialize here
        # omim = OMIM(self.graph_type, self.are_bnodes_skized)
        id_map = {}
        allomimids = set()
        col = ['NBK_id', 'GR_shortname', 'OMIM']

        with open(raw, 'r', encoding="utf8") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            header = next(filereader)
            header[0] = header[0][1:]
            if header != col:
                LOG.error('\nExpected header: %s\nRecieved header: %s', col,
                          header)
                exit(-1)

            for row in filereader:

                nbk_num = row[col.index('NBK_id')]
                shortname = row[col.index('GR_shortname')]
                omim_num = row[col.index('OMIM')]
                gr_id = 'GeneReviews:' + nbk_num
                omim_id = 'OMIM:' + omim_num
                if not ((self.test_mode and len(self.test_ids) > 0
                         and omim_id in self.test_ids) or not self.test_mode):
                    continue

                # sometimes there's bad omim nums
                omim_num = omim_num.strip()
                if len(omim_num) > 6:
                    LOG.warning(
                        "OMIM number incorrectly formatted in row %d; skipping:\n%s",
                        filereader.line_num, '\t'.join(row))
                    continue

                # build up a hashmap of the mappings; then process later
                if nbk_num not in id_map:
                    id_map[nbk_num] = set()
                id_map[nbk_num].add(omim_num)

                # add the class along with the shortname
                model.addClassToGraph(gr_id, None)
                model.addSynonym(gr_id, shortname)

                allomimids.add(omim_num)

                if not self.test_mode and limit is not None \
                        and filereader.line_num > limit:
                    break

            # end looping through file

        # get the omim ids that are not genes
        # entries_that_are_phenotypes = omim.process_entries(
        #    list(allomimids), filter_keep_phenotype_entry_ids, None, None,
        #    limit=limit, globaltt=self.globaltt)
        #
        # LOG.info(
        #    "Filtered out %d/%d entries that are genes or features",
        #    len(allomimids)-len(entries_that_are_phenotypes), len(allomimids))
        ##########################################################################

        # given all_omim_ids from GR,
        # we want to update any which are changed or removed
        # before deciding which are disease / phenotypes
        replaced = allomimids & self.omim_replaced.keys()
        if replaced is not None and len(replaced) > 0:
            LOG.warning("These OMIM ID's are past their pull date: %s",
                        str(replaced))
            for oid in replaced:
                allomimids.remove(oid)
                replacements = self.omim_replaced[oid]
                for rep in replacements:
                    allomimids.update(rep)
        # guard against omim identifiers which have been removed
        obsolete = [
            o for o in self.omim_type
            if self.omim_type[o] == self.globaltt['obsolete']
        ]
        removed = allomimids & set(obsolete)
        if removed is not None and len(removed) > 0:
            LOG.warning("These OMIM ID's are gone: %s", str(removed))
            for oid in removed:
                allomimids.remove(oid)
        # filter for disease /phenotype types (we can argue about what is included)
        omim_phenotypes = set([
            omim for omim in self.omim_type if self.omim_type[omim] in (
                self.globaltt['Phenotype'],
                self.globaltt[
                    'has_affected_feature'],  # both a gene and a phenotype
                self.globaltt['heritable_phenotypic_marker'])
        ])  # probable phenotype
        LOG.info("Have %i omim_ids globally typed as phenotypes from OMIM",
                 len(omim_phenotypes))

        entries_that_are_phenotypes = allomimids & omim_phenotypes
        LOG.info("Filtered out %d/%d entries that are genes or features",
                 len(allomimids - entries_that_are_phenotypes),
                 len(allomimids))

        for nbk_num in self.book_ids:
            gr_id = 'GeneReviews:' + nbk_num
            if nbk_num in id_map:
                omim_ids = id_map.get(nbk_num)
                for omim_num in omim_ids:
                    omim_id = 'OMIM:' + omim_num
                    # add the gene reviews as a superclass to the omim id,
                    # but only if the omim id is not a gene
                    if omim_id in entries_that_are_phenotypes:
                        model.addClassToGraph(omim_id, None)
                        model.addSubClass(omim_id, gr_id)
            # add this as a generic subclass  -- TEC: this is the job of inference
            model.addSubClass(gr_id, self.globaltt['disease'])

        return
Beispiel #6
0
class Pathway():
    """
    This provides convenience methods to deal with gene and protein collections
    in the context of pathways.
    """
    def __init__(self, graph):
        if isinstance(graph, Graph):
            self.graph = graph
        else:
            raise ValueError("{} is not a graph".format(graph))
        self.model = Model(self.graph)
        self.globaltt = self.graph.globaltt
        self.globaltcid = self.graph.globaltcid
        self.curie_map = self.graph.curie_map
        self.gut = GraphUtils(self.curie_map)

    def addPathway(self,
                   pathway_id,
                   pathway_label,
                   pathway_type=None,
                   pathway_description=None):
        """
        Adds a pathway as a class.  If no specific type is specified, it will
        default to a subclass of "GO:cellular_process" and "PW:pathway".
        :param pathway_id:
        :param pathway_label:
        :param pathway_type:
        :param pathway_description:
        :return:
        """

        if pathway_type is None:
            pathway_type = self.globaltt['cellular_process']
        self.model.addClassToGraph(pathway_id, pathway_label, pathway_type,
                                   pathway_description)
        self.model.addSubClass(pathway_id, self.globaltt['pathway'])

    def addGeneToPathway(self, gene_id, pathway_id):
        """
        When adding a gene to a pathway, we create an intermediate
        'gene product' that is involved in
        the pathway, through a blank node.

        gene_id RO:has_gene_product _gene_product
        _gene_product RO:involved_in pathway_id

        :param pathway_id:
        :param gene_id:
        :return:
        """
        # bnode
        gene_product = ':'.join(
            ('_', self.gut.digest_id(gene_id.replace(':', '') + 'product')))
        self.model.addIndividualToGraph(gene_product, None,
                                        self.globaltt['gene_product'])
        self.graph.addTriple(gene_product, self.globaltt['label'], pathway_id)

        self.graph.addTriple(gene_id, self.globaltt['has gene product'],
                             gene_product)
        self.addComponentToPathway(gene_product, pathway_id)

    def addComponentToPathway(self, component_id, pathway_id):
        """
        This can be used directly when the component is directly involved in
        the pathway.  If a transforming event is performed on the component
        first, then the addGeneToPathway should be used instead.

        :param pathway_id:
        :param component_id:
        :param component_category: biolink category for component_id
        :param pathway_category: biolink category for pathway_id
        :return:
        """
        self.graph.addTriple(component_id, self.globaltt['involved in'],
                             pathway_id)
Beispiel #7
0
class Pathway():
    """
    This provides convenience methods to deal with gene and protein collections
    in the context of pathways.
    """

    pathway_parts = {
        'signal_transduction': 'GO:0007165',
        'cellular_process': 'GO:0009987',
        'pathway': 'PW:0000001',
        'gene_product': 'CHEBI:33695'  # bioinformation molecule
    }

    object_properties = {
        'involved_in': 'RO:0002331',
        'gene_product_of': 'RO:0002204',
        'has_gene_product': 'RO:0002205'
    }

    properties = object_properties.copy()

    def __init__(self, graph):
        if isinstance(graph, Graph):
            self.graph = graph
        else:
            raise ValueError("{} is not a graph".graph)
        self.model = Model(self.graph)

        return

    def addPathway(self,
                   pathway_id,
                   pathway_label,
                   pathway_type=None,
                   pathway_description=None):
        """
        Adds a pathway as a class.  If no specific type is specified, it will
        default to a subclass of "GO:cellular_process" and "PW:pathway".
        :param pathway_id:
        :param pathway_label:
        :param pathway_type:
        :param pathway_description:
        :return:
        """

        if pathway_type is None:
            pathway_type = self.pathway_parts['cellular_process']
        self.model.addClassToGraph(pathway_id, pathway_label, pathway_type,
                                   pathway_description)
        self.model.addSubClass(pathway_id, self.pathway_parts['pathway'])

        return

    def addGeneToPathway(self, gene_id, pathway_id):
        """
        When adding a gene to a pathway, we create an intermediate
        'gene product' that is involved in
        the pathway, through a blank node.

        gene_id RO:has_gene_product _gene_product
        _gene_product RO:involved_in pathway_id

        :param pathway_id:
        :param gene_id:
        :return:
        """

        gene_product = '_:' + re.sub(r':', '', gene_id) + 'product'
        self.model.addIndividualToGraph(gene_product, None,
                                        self.pathway_parts['gene_product'])
        self.graph.addTriple(gene_id,
                             self.object_properties['has_gene_product'],
                             gene_product)
        self.addComponentToPathway(gene_product, pathway_id)

        return

    def addComponentToPathway(self, component_id, pathway_id):
        """
        This can be used directly when the component is directly involved in
        the pathway.  If a transforming event is performed on the component
        first, then the addGeneToPathway should be used instead.

        :param pathway_id:
        :param component_id:
        :return:
        """
        self.graph.addTriple(component_id,
                             self.object_properties['involved_in'], pathway_id)

        return
Beispiel #8
0
    def _get_equivids(self, limit):
        """
        The file processed here is of the format:
        #NBK_id GR_shortname    OMIM
        NBK1103 trimethylaminuria       136132
        NBK1103 trimethylaminuria       602079
        NBK1104 cdls    122470
        Where each of the rows represents a mapping between
        a gr id and an omim id. These are a 1:many relationship,
        and some of the omim ids are genes(not diseases).
        Therefore, we need to create a loose coupling here.
        We make the assumption that these NBKs are generally higher-level
        grouping classes; therefore the OMIM ids are treated as subclasses.
        (This assumption is poor for those omims that are actually genes,
        but we have no way of knowing what those are here...
        we will just have to deal with that for now.)
        :param limit:
        :return:

        """
        raw = '/'.join((self.rawdir, self.files['idmap']['file']))
        model = Model(self.graph)
        line_counter = 0

        # we look some stuff up in OMIM, so initialize here
        omim = OMIM(self.graph_type, self.are_bnodes_skized)
        id_map = {}
        allomimids = set()
        with open(raw, 'r', encoding="utf8") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                if line_counter == 1:  # skip header
                    continue
                (nbk_num, shortname, omim_num) = row
                gr_id = 'GeneReviews:' + nbk_num
                omim_id = 'OMIM:' + omim_num
                if not ((self.testMode and len(self.test_ids) > 0
                         and omim_id in self.test_ids) or not self.testMode):
                    continue

                # sometimes there's bad omim nums
                if len(omim_num) > 6:
                    logger.warning(
                        "OMIM number incorrectly formatted " +
                        "in row %d; skipping:\n%s", line_counter,
                        '\t'.join(row))
                    continue

                # build up a hashmap of the mappings; then process later
                if nbk_num not in id_map:
                    id_map[nbk_num] = set()
                id_map[nbk_num].add(omim_num)

                # add the class along with the shortname
                model.addClassToGraph(gr_id, None)
                model.addSynonym(gr_id, shortname)

                allomimids.add(omim_num)

                if not self.testMode and \
                        limit is not None and line_counter > limit:
                    break

            # end looping through file

        # get the omim ids that are not genes
        entries_that_are_phenotypes = \
            omim.process_entries(
                list(allomimids), filter_keep_phenotype_entry_ids,
                None, None, limit)

        logger.info("Filtered out %d/%d entries that are genes or features",
                    len(allomimids) - len(entries_that_are_phenotypes),
                    len(allomimids))

        for nbk_num in self.book_ids:
            gr_id = 'GeneReviews:' + nbk_num
            if nbk_num in id_map:
                omim_ids = id_map.get(nbk_num)
                for omim_num in omim_ids:
                    omim_id = 'OMIM:' + omim_num
                    # add the gene reviews as a superclass to the omim id,
                    # but only if the omim id is not a gene
                    if omim_id in entries_that_are_phenotypes:
                        model.addClassToGraph(omim_id, None)
                        model.addSubClass(omim_id, gr_id)
            # add this as a generic subclass of DOID:4
            model.addSubClass(gr_id, 'DOID:4')

        return
Beispiel #9
0
class Pathway():
    """
    This provides convenience methods to deal with gene and protein collections
    in the context of pathways.
    """

    def __init__(self, graph):
        if isinstance(graph, Graph):
            self.graph = graph
        else:
            raise ValueError("{} is not a graph".format(graph))
        self.model = Model(self.graph)
        self.globaltt = self.graph.globaltt
        self.globaltcid = self.graph.globaltcid
        self.curie_map = self.graph.curie_map
        return

    def addPathway(
            self, pathway_id, pathway_label, pathway_type=None,
            pathway_description=None):
        """
        Adds a pathway as a class.  If no specific type is specified, it will
        default to a subclass of "GO:cellular_process" and "PW:pathway".
        :param pathway_id:
        :param pathway_label:
        :param pathway_type:
        :param pathway_description:
        :return:
        """

        if pathway_type is None:
            pathway_type = self.globaltt['cellular_process']
        self.model.addClassToGraph(
            pathway_id, pathway_label, pathway_type, pathway_description)
        self.model.addSubClass(pathway_id, self.globaltt['pathway'])

        return

    def addGeneToPathway(self, gene_id, pathway_id):
        """
        When adding a gene to a pathway, we create an intermediate
        'gene product' that is involved in
        the pathway, through a blank node.

        gene_id RO:has_gene_product _gene_product
        _gene_product RO:involved_in pathway_id

        :param pathway_id:
        :param gene_id:
        :return:
        """

        gene_product = '_:'+re.sub(r':', '', gene_id) + 'product'
        self.model.addIndividualToGraph(
            gene_product, None, self.globaltt['gene_product'])
        self.graph.addTriple(
            gene_id, self.globaltt['has gene product'], gene_product)
        self.addComponentToPathway(gene_product, pathway_id)

        return

    def addComponentToPathway(self, component_id, pathway_id):
        """
        This can be used directly when the component is directly involved in
        the pathway.  If a transforming event is performed on the component
        first, then the addGeneToPathway should be used instead.

        :param pathway_id:
        :param component_id:
        :return:
        """
        self.graph.addTriple(component_id, self.globaltt['involved in'], pathway_id)

        return
Beispiel #10
0
    def _get_equivids(self, limit):
        """
        The file processed here is of the format:
        #NBK_id GR_shortname    OMIM
        NBK1103 trimethylaminuria       136132
        NBK1103 trimethylaminuria       602079
        NBK1104 cdls    122470
        Where each of the rows represents a mapping between
        a gr id and an omim id. These are a 1:many relationship,
        and some of the omim ids are genes(not diseases).
        Therefore, we need to create a loose coupling here.
        We make the assumption that these NBKs are generally higher-level
        grouping classes; therefore the OMIM ids are treated as subclasses.

        :param limit:

        """
        raw = '/'.join((self.rawdir, self.files['idmap']['file']))
        model = Model(self.graph)
        LOG.info('Looping over %s', raw)
        # we look some stuff up in OMIM, so initialize here
        # omim = OMIM(self.graph_type, self.are_bnodes_skized)
        id_map = {}
        allomimids = set()
        col = ['NBK_id', 'GR_shortname', 'OMIM']

        with open(raw, 'r', encoding="utf8") as csvfile:
            reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            row = next(reader)
            row[0] = row[0][1:]
            if not self.check_fileheader(col, row):
                exit(-1)

            for row in filereader:

                nbk_num = row[col.index('NBK_id')]
                shortname = row[col.index('GR_shortname')]
                omim_num = row[col.index('OMIM')]
                gr_id = 'GeneReviews:' + nbk_num
                omim_id = 'OMIM:' + omim_num
                if not (
                        (self.test_mode and
                         len(self.test_ids) > 0 and
                         omim_id in self.test_ids) or not
                        self.test_mode):
                    continue

                # sometimes there's bad omim nums
                omim_num = omim_num.strip()
                if len(omim_num) != 6:
                    LOG.warning(
                        "OMIM number incorrectly formatted in row %i; skipping:\n%s",
                        filereader.line_num, '\t'.join(row))
                    continue

                # build up a hashmap of the mappings; then process later
                if nbk_num not in id_map:
                    id_map[nbk_num] = set()
                id_map[nbk_num].add(omim_num)

                # add the class along with the shortname
                model.addClassToGraph(gr_id, None)
                model.addSynonym(gr_id, shortname)

                allomimids.add(omim_num)

                if not self.test_mode and limit is not None and reader.line_num > limit:
                    break

            # end looping through file

        # given all_omim_ids from GR,
        # we want to update any which are changed or removed
        # before deciding which are disease / phenotypes
        replaced = allomimids & self.omim_replaced.keys()
        if replaced is not None and len(replaced) > 0:
            LOG.warning("These OMIM ID's are past their pull date: %s", str(replaced))
            for oid in replaced:
                allomimids.remove(oid)
                replacements = self.omim_replaced[oid]
                for rep in replacements:
                    allomimids.update(rep)
        # guard against omim identifiers which have been removed
        obsolete = [
            o for o in self.omim_type
            if self.omim_type[o] == self.globaltt['obsolete']]
        removed = allomimids & set(obsolete)
        if removed is not None and len(removed) > 0:
            LOG.warning("These OMIM ID's are gone: %s", str(removed))
            for oid in removed:
                allomimids.remove(oid)
        # filter for disease /phenotype types (we can argue about what is included)
        omim_phenotypes = set([
            omim for omim in self.omim_type if self.omim_type[omim] in (
                self.globaltt['Phenotype'],
                self.globaltt['has_affected_feature'],  # both a gene and a phenotype
                self.globaltt['heritable_phenotypic_marker'])])  # probable phenotype
        LOG.info(
            "Have %i omim_ids globally typed as phenotypes from OMIM",
            len(omim_phenotypes))

        entries_that_are_phenotypes = allomimids & omim_phenotypes
        LOG.info(
            "Filtered out %d/%d entries that are genes or features",
            len(allomimids - entries_that_are_phenotypes), len(allomimids))

        for nbk_num in self.book_ids:
            gr_id = 'GeneReviews:'+nbk_num
            if nbk_num in id_map:
                omim_ids = id_map.get(nbk_num)
                for omim_num in omim_ids:
                    omim_id = 'OMIM:'+omim_num
                    # add the gene reviews as a superclass to the omim id,
                    # but only if the omim id is not a gene
                    if omim_id in entries_that_are_phenotypes:
                        model.addClassToGraph(omim_id, None)
                        model.addSubClass(omim_id, gr_id)
            # add this as a generic subclass  -- TEC: this is the job of inference
            model.addSubClass(gr_id, self.globaltt['disease'])
Beispiel #11
0
class Pathway():
    """
    This provides convenience methods to deal with gene and protein collections
    in the context of pathways.
    """

    pathway_parts = {
        'signal_transduction': 'GO:0007165',
        'cellular_process': 'GO:0009987',
        'pathway': 'PW:0000001',
        'gene_product': 'CHEBI:33695'  # bioinformation molecule
    }

    object_properties = {
        'involved_in': 'RO:0002331',
        'gene_product_of': 'RO:0002204',
        'has_gene_product': 'RO:0002205'
    }

    properties = object_properties.copy()

    def __init__(self, graph):
        if isinstance(graph, Graph):
            self.graph = graph
        else:
            raise ValueError("{} is not a graph".graph)
        self.model = Model(self.graph)

        return

    def addPathway(
            self, pathway_id, pathway_label, pathway_type=None,
            pathway_description=None):
        """
        Adds a pathway as a class.  If no specific type is specified, it will
        default to a subclass of "GO:cellular_process" and "PW:pathway".
        :param pathway_id:
        :param pathway_label:
        :param pathway_type:
        :param pathway_description:
        :return:
        """

        if pathway_type is None:
            pathway_type = self.pathway_parts['cellular_process']
        self.model.addClassToGraph(
            pathway_id, pathway_label, pathway_type, pathway_description)
        self.model.addSubClass(pathway_id, self.pathway_parts['pathway'])

        return

    def addGeneToPathway(self, gene_id, pathway_id):
        """
        When adding a gene to a pathway, we create an intermediate
        'gene product' that is involved in
        the pathway, through a blank node.

        gene_id RO:has_gene_product _gene_product
        _gene_product RO:involved_in pathway_id

        :param pathway_id:
        :param gene_id:
        :return:
        """

        gene_product = '_:'+re.sub(r':', '', gene_id)+'product'
        self.model.addIndividualToGraph(
            gene_product, None, self.pathway_parts['gene_product'])
        self.graph.addTriple(
            gene_id, self.object_properties['has_gene_product'], gene_product)
        self.addComponentToPathway(gene_product, pathway_id)

        return

    def addComponentToPathway(self, component_id, pathway_id):
        """
        This can be used directly when the component is directly involved in
        the pathway.  If a transforming event is performed on the component
        first, then the addGeneToPathway should be used instead.

        :param pathway_id:
        :param component_id:
        :return:
        """
        self.graph.addTriple(
            component_id, self.object_properties['involved_in'], pathway_id)

        return
Beispiel #12
0
    def _get_equivids(self, limit):
        """
        The file processed here is of the format:
        #NBK_id GR_shortname    OMIM
        NBK1103 trimethylaminuria       136132
        NBK1103 trimethylaminuria       602079
        NBK1104 cdls    122470
        Where each of the rows represents a mapping between
        a gr id and an omim id. These are a 1:many relationship,
        and some of the omim ids are genes(not diseases).
        Therefore, we need to create a loose coupling here.
        We make the assumption that these NBKs are generally higher-level
        grouping classes; therefore the OMIM ids are treated as subclasses.
        (This assumption is poor for those omims that are actually genes,
        but we have no way of knowing what those are here...
        we will just have to deal with that for now.)
        :param limit:
        :return:

        """
        raw = '/'.join((self.rawdir, self.files['idmap']['file']))
        model = Model(self.graph)
        line_counter = 0

        # we look some stuff up in OMIM, so initialize here
        omim = OMIM(self.graph_type, self.are_bnodes_skized)
        id_map = {}
        allomimids = set()
        with open(raw, 'r', encoding="utf8") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                if line_counter == 1:  # skip header
                    continue
                (nbk_num, shortname, omim_num) = row
                gr_id = 'GeneReviews:'+nbk_num
                omim_id = 'OMIM:'+omim_num
                if not (
                        (self.testMode and
                         len(self.test_ids) > 0 and
                         omim_id in self.test_ids) or not
                        self.testMode):
                    continue

                # sometimes there's bad omim nums
                if len(omim_num) > 6:
                    logger.warning(
                        "OMIM number incorrectly formatted " +
                        "in row %d; skipping:\n%s",
                        line_counter, '\t'.join(row))
                    continue

                # build up a hashmap of the mappings; then process later
                if nbk_num not in id_map:
                    id_map[nbk_num] = set()
                id_map[nbk_num].add(omim_num)

                # add the class along with the shortname
                model.addClassToGraph(gr_id, None)
                model.addSynonym(gr_id, shortname)

                allomimids.add(omim_num)

                if not self.testMode and \
                        limit is not None and line_counter > limit:
                    break

            # end looping through file

        # get the omim ids that are not genes
        entries_that_are_phenotypes = \
            omim.process_entries(
                list(allomimids), filter_keep_phenotype_entry_ids,
                None, None, limit)

        logger.info("Filtered out %d/%d entries that are genes or features",
                    len(allomimids)-len(entries_that_are_phenotypes),
                    len(allomimids))

        for nbk_num in self.book_ids:
            gr_id = 'GeneReviews:'+nbk_num
            if nbk_num in id_map:
                omim_ids = id_map.get(nbk_num)
                for omim_num in omim_ids:
                    omim_id = 'OMIM:'+omim_num
                    # add the gene reviews as a superclass to the omim id,
                    # but only if the omim id is not a gene
                    if omim_id in entries_that_are_phenotypes:
                        model.addClassToGraph(omim_id, None)
                        model.addSubClass(omim_id, gr_id)
            # add this as a generic subclass of DOID:4
            model.addSubClass(gr_id, 'DOID:4')

        return