Ejemplo n.º 1
0
    def get(self, category):
        """
        Summarize a set of objects
        """
        args = parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']
        # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
        # https://github.com/monarch-initiative/dipper/issues/461

        sg_dev = SciGraph(
            url='https://scigraph-data-dev.monarchinitiative.org/scigraph/')

        subjects = [
            x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x
            for x in subjects
        ]
        slimmer_subjects = []
        if category == FUNCTION_CATEGORY:
            # get proteins for a gene only when the category is 'function'
            for s in subjects:
                if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s:
                    prots = sg_dev.gene_to_uniprot_proteins(s)
                    if len(prots) == 0:
                        prots = [s]
                    slimmer_subjects += prots
                else:
                    slimmer_subjects.append(s)
        else:
            slimmer_subjects = subjects

        if category == ANATOMY_CATEGORY:
            category = 'anatomical entity'

        results = map2slim(subjects=slimmer_subjects,
                           slim=slim,
                           object_category=category,
                           user_agent=USER_AGENT,
                           **args)

        # To the fullest extent possible return HGNC ids
        checked = {}
        for result in results:
            for association in result['assocs']:
                taxon = association['subject']['taxon']['id']
                proteinId = association['subject']['id']
                if taxon == 'NCBITaxon:9606' and proteinId.startswith(
                        'UniProtKB:'):
                    if checked.get(proteinId) == None:
                        genes = sg_dev.uniprot_protein_to_genes(proteinId)
                        for gene in genes:
                            if gene.startswith('HGNC'):
                                association['subject']['id'] = gene
                                checked[proteinId] = gene
                    else:
                        association['subject']['id'] = checked[proteinId]
        return results
Ejemplo n.º 2
0
    def get(self, category):
        """
        Summarize a set of objects
        """
        args = parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']
        # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
        # https://github.com/monarch-initiative/dipper/issues/461
        # nota bene:
        # currently incomplete because code is not checking for the possibility of >1 subjects

        subjects[0] = subjects[0].replace('WormBase:', 'WB:', 1)

        if (subjects[0].startswith('HGNC')
                or subjects[0].startswith('NCBIGene')
                or subjects[0].startswith('ENSEMBL:')):
            sg_dev = SciGraph(
                url='https://scigraph-data-dev.monarchinitiative.org/scigraph/'
            )
            prots = sg_dev.gene_to_uniprot_proteins(subjects[0])
            if len(prots) == 0:
                prots = subjects
        else:
            prots = subjects

        results = map2slim(subjects=prots,
                           slim=slim,
                           rows=200,
                           exclude_automatic_assertions=True,
                           object_category=category,
                           **args)
        # To the fullest extent possible return HGNC ids
        checked = {}
        for result in results:
            for association in result['assocs']:
                taxon = association['subject']['taxon']['id']
                proteinId = association['subject']['id']
                if taxon == 'NCBITaxon:9606' and proteinId.startswith(
                        'UniProtKB:'):
                    if checked.get(proteinId) == None:
                        sg_dev = SciGraph(
                            url=
                            'https://scigraph-data-dev.monarchinitiative.org/scigraph/'
                        )
                        genes = sg_dev.uniprot_protein_to_genes(proteinId)
                        for gene in genes:
                            if gene.startswith('HGNC'):
                                association['subject']['id'] = gene
                                checked[proteinId] = gene
                    else:
                        association['subject']['id'] = checked[proteinId]
        return results
Ejemplo n.º 3
0
    def get(self):
        """
        For a given gene(s), summarize its annotations over a defined set of slim
        """
        args = self.function_parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']

        # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
        # https://github.com/monarch-initiative/dipper/issues/461

        sg_dev = SciGraph(get_biolink_config()['scigraph_data']['url'])

        subjects = [
            x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x
            for x in subjects
        ]
        slimmer_subjects = []
        for s in subjects:
            if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s:
                prots = identifier_converter.convert_gene_to_protein(s)
                if len(prots) == 0:
                    prots = [s]
                slimmer_subjects += prots
            else:
                slimmer_subjects.append(s)

        results = map2slim(subjects=slimmer_subjects,
                           slim=slim,
                           object_category='function',
                           user_agent=USER_AGENT,
                           **args)

        # To the fullest extent possible return HGNC ids
        checked = {}
        for result in results:
            for association in result['assocs']:
                taxon = association['subject']['taxon']['id']
                proteinId = association['subject']['id']
                if taxon == 'NCBITaxon:9606' and proteinId.startswith(
                        'UniProtKB:'):
                    if proteinId not in checked:
                        genes = identifier_converter.convert_protein_to_gene(
                            proteinId)
                        for gene in genes:
                            if gene.startswith('HGNC'):
                                association['subject']['id'] = gene
                                checked[proteinId] = gene
                    else:
                        association['subject']['id'] = checked[proteinId]

        return results
Ejemplo n.º 4
0
def test_map2slim_golr():
    results = map2slim(subjects=SUBJECTS,
                       slim=SLIM,
                       object_category='function')

    assert len(results) > 0
    n_found = 0
    for r in results:
        n_found = n_found + 1
        print("Subject: {} Slim:{} Assocs:{}".format(r['subject'], r['slim'],
                                                     len(r['assocs'])))
    assert n_found > 0
Ejemplo n.º 5
0
 def get(self, category):
     """
     Summarize a set of objects
     """
     args = parser.parse_args()
     logging.info("category is {}".format(category))
     slim = args.get('slim')
     del args['slim']
     subjects = args.get('subject')
     del args['subject']
     results = map2slim(subjects=subjects,
                        slim=slim,
                        rows=200,
                        object_category=category,
                        **args)
     # If there are no associations for the given ID, try other IDs.
     # Note the AmiGO instance does *not* support equivalent IDs
     assoc_count = 0
     for result in results:
         assoc_count += len(result['assocs'])
     if assoc_count == 0 and len(subjects) == 1:
         # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
         # https://github.com/monarch-initiative/dipper/issues/461
         # nota bene:
         # currently incomplete because code is not checking for the possibility of >1 subjects
         logging.info(
             "Found no associations using {} - will try mapping to other IDs"
             .format(subjects[0]))
         sg_dev = SciGraph(
             url='https://scigraph-data-dev.monarchinitiative.org/scigraph/'
         )
         prots = sg_dev.gene_to_uniprot_proteins(subjects[0])
         if len(prots) > 0:
             results = map2slim(subjects=prots,
                                slim=slim,
                                rows=200,
                                object_category=category,
                                **args)
     return results
Ejemplo n.º 6
0
    def get(self):
        """
        For a given gene(s), summarize its annotations over a defined set of slim
        """
        args = parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']

        subjects = [x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x for x in subjects]
        results = map2slim(
            subjects=subjects,
            slim=slim,
            object_category='anatomical entity',
            user_agent=USER_AGENT,
            **args
        )
        return results