Esempio n. 1
0
    def get(self, category):
        """
        Summarize a set of objects
        """
        args = parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']
        # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
        # https://github.com/monarch-initiative/dipper/issues/461

        sg_dev = SciGraph(
            url='https://scigraph-data-dev.monarchinitiative.org/scigraph/')

        subjects = [
            x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x
            for x in subjects
        ]
        slimmer_subjects = []
        if category == FUNCTION_CATEGORY:
            # get proteins for a gene only when the category is 'function'
            for s in subjects:
                if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s:
                    prots = sg_dev.gene_to_uniprot_proteins(s)
                    if len(prots) == 0:
                        prots = [s]
                    slimmer_subjects += prots
                else:
                    slimmer_subjects.append(s)
        else:
            slimmer_subjects = subjects

        if category == ANATOMY_CATEGORY:
            category = 'anatomical entity'

        results = map2slim(subjects=slimmer_subjects,
                           slim=slim,
                           object_category=category,
                           user_agent=USER_AGENT,
                           **args)

        # To the fullest extent possible return HGNC ids
        checked = {}
        for result in results:
            for association in result['assocs']:
                taxon = association['subject']['taxon']['id']
                proteinId = association['subject']['id']
                if taxon == 'NCBITaxon:9606' and proteinId.startswith(
                        'UniProtKB:'):
                    if checked.get(proteinId) == None:
                        genes = sg_dev.uniprot_protein_to_genes(proteinId)
                        for gene in genes:
                            if gene.startswith('HGNC'):
                                association['subject']['id'] = gene
                                checked[proteinId] = gene
                    else:
                        association['subject']['id'] = checked[proteinId]
        return results
Esempio n. 2
0
    def get(self, category):
        """
        Summarize a set of objects
        """
        args = parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']
        # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
        # https://github.com/monarch-initiative/dipper/issues/461
        # nota bene:
        # currently incomplete because code is not checking for the possibility of >1 subjects

        subjects[0] = subjects[0].replace('WormBase:', 'WB:', 1)

        if (subjects[0].startswith('HGNC')
                or subjects[0].startswith('NCBIGene')
                or subjects[0].startswith('ENSEMBL:')):
            sg_dev = SciGraph(
                url='https://scigraph-data-dev.monarchinitiative.org/scigraph/'
            )
            prots = sg_dev.gene_to_uniprot_proteins(subjects[0])
            if len(prots) == 0:
                prots = subjects
        else:
            prots = subjects

        results = map2slim(subjects=prots,
                           slim=slim,
                           rows=200,
                           exclude_automatic_assertions=True,
                           object_category=category,
                           **args)
        # To the fullest extent possible return HGNC ids
        checked = {}
        for result in results:
            for association in result['assocs']:
                taxon = association['subject']['taxon']['id']
                proteinId = association['subject']['id']
                if taxon == 'NCBITaxon:9606' and proteinId.startswith(
                        'UniProtKB:'):
                    if checked.get(proteinId) == None:
                        sg_dev = SciGraph(
                            url=
                            'https://scigraph-data-dev.monarchinitiative.org/scigraph/'
                        )
                        genes = sg_dev.uniprot_protein_to_genes(proteinId)
                        for gene in genes:
                            if gene.startswith('HGNC'):
                                association['subject']['id'] = gene
                                checked[proteinId] = gene
                    else:
                        association['subject']['id'] = checked[proteinId]
        return results
Esempio n. 3
0
class SciGraphIdentifierConverter(object):
    """
    Class for performing ID conversion using SciGraph
    """
    def __init__(self):
        self.scigraph = SciGraph(get_biolink_config()['scigraph_data']['url'])

    def convert_gene_to_protein(self, identifier):
        """
        Query SciGraph with a gene ID and get its corresponding UniProtKB ID
        """
        protein_ids = self.scigraph.gene_to_uniprot_proteins(identifier)
        return protein_ids

    def convert_protein_to_gene(self, identifier):
        """
        Query SciGraph with UniProtKB ID and get its corresponding HGNC gene ID
        """
        gene_ids = self.scigraph.uniprot_protein_to_genes(identifier)
        return gene_ids