예제 #1
0
class LookUp(object):

    def __init__(self):
        self.blw = BioLinkWrapper(Config().get_biolink_api_endpoint())
        self.mg = get_client('gene')
        self.input_object = ''
        self.meta = {
            'data_type': 'disease',
            'input_type': {
                'complexity': 'single',
                'id_type': ['MONDO', 'DO', 'OMIM'],
            },
            'output_type': {
                'complexity': 'set',
                'id_type': 'HGNC'
            },
            'taxon': 'human',
            'limit': None,
            'source': 'Monarch Biolink',
            'predicate': 'blm:gene associated with condition'
        }

    def metadata(self):
        print("""Mod O DiseaseGeneLookup metadata:""")
        pprint(self.meta)

    #
    # def load_input_object(self, input_object):
    #     input_object = self.blw.get_obj(obj_id=input_object['input'])
    #     self.input_object = {
    #         'id': input_object['id'],
    #         'label': input_object['label'],
    #         'description': input_object['description'],
    #     }
    #
    # def get_input_object_id(self):
    #     return self.input_object['id']

    def echo_input_object(self, output=None):
        if output:
            dump(self.input_object, output, indent=4, separators=(',', ': '))
        else:
            dump(self.input_object, stdout, indent=4, separators=(',', ': '))

    def disease_geneset_lookup(self, name, id):
        input_disease_label = name
        input_disease_id = id
        input_gene_set = self.blw.disease2genes(input_disease_id)
        input_gene_set = [self.blw.parse_association(input_disease_id, input_disease_label, x) for x in
                          input_gene_set['associations']]
        for input_gene in input_gene_set:
            igene_mg = self.mg.query(input_gene['hit_id'].replace('HGNC', 'hgnc'), species='human', entrezonly=True,
                                     fields='entrez,HGNC,symbol')
            input_gene.update({'input_ncbi': 'NCBIGene:{}'.format(igene_mg['hits'][0]['_id'])})
        input_genes_df = pd.DataFrame(data=input_gene_set)
        # # group duplicate ids and gather sources
        input_genes_df['sources'] = input_genes_df['sources'].str.join(', ')
        input_genes_df = input_genes_df.groupby(
            ['input_id', 'input_symbol', 'hit_id', 'hit_symbol', 'relation'])['sources'].apply(', '.join).reset_index()
        return input_genes_df
예제 #2
0
class GeneInteractions(object):

    def __init__(self):
        self.blw = BioLinkWrapper()
        self.meta = {
            'input_type': {
                'complexity': 'set',
                'id_type': 'HGNC',
                'data_type': 'gene',
            },
            'output_type': {
                'complexity': 'set',
                'id_type': 'HGNC',
                'data_type': 'gene',
            },

            'source': 'Monarch Biolink',
            'predicate': ['blm:interacts with']
        }

    def metadata(self):
        print("""Mod1E Interaction Network metadata:""")
        pprint(self.meta)

    @staticmethod
    def load_gene_set(input_gene_set):
        annotated_gene_set = []
        for gene in input_gene_set.get_input_curie_set():
            annotated_gene_set.append({
                'input_id': gene['hit_id'],
                'sim_input_curie': gene['hit_id'],
                'input_symbol': gene['hit_symbol']
            })
        return annotated_gene_set

    def get_interactions(self, annotated_gene_set):
        results = []
        for gene in annotated_gene_set:
            interactions = self.blw.gene_interactions(gene_curie=gene['sim_input_curie'])
            for assoc in interactions['associations']:
                interaction = \
                    self.blw.parse_association(
                        input_id=gene['sim_input_curie'],
                        input_label=gene['input_symbol'],
                        association=assoc
                    )
                results.append({
                    'input_id': interaction['input_id'],
                    'input_symbol': interaction['input_symbol'],
                    'hit_symbol': interaction['hit_symbol'],
                    'hit_id': interaction['hit_id'],
                    'score': 0,
                })
        return results
예제 #3
0
class LookUp(object):
    def __init__(self):
        self.blw = BioLinkWrapper()
        self.mg = MyGeneInfo()
        self.input_object = ''
        self.meta = {
            'data_type': 'disease',
            'input_type': {
                'complexity': 'single',
                'id_type': ['MONDO', 'DO', 'OMIM'],
            },
            'output_type': {
                'complexity': 'set',
                'id_type': 'HGNC'
            },
            'taxon': 'human',
            'limit': None,
            'source': 'Monarch Biolink',
            'predicate': 'blm:gene associated with condition'
        }
        print("""Mod O DiseaseGeneLookup metadata:""")
        pprint(self.meta)

    def load_input_object(self, input_object):
        input_object = self.blw.get_obj(obj_id=input_object['input'])
        self.input_object = {
            'id': input_object['id'],
            'label': input_object['label'],
            'description': input_object['description'],
        }
        pprint(self.input_object)

    def disease_geneset_lookup(self):
        input_disease_id = self.input_object['id']
        input_disease_label = self.input_object['label']
        input_gene_set = self.blw.disease2genes(input_disease_id)
        input_gene_set = [
            self.blw.parse_association(input_disease_id, input_disease_label,
                                       x)
            for x in input_gene_set['associations']
        ]
        # for input_gene in input_gene_set:
        #     igene_mg = self.mg.query(input_gene['hit_id'].replace('HGNC', 'hgnc'), species='human', entrezonly=True,
        #                         fields='entrez,HGNC,symbol')
        #     input_gene.update({'input_ncbi': 'NCBIGene:{}'.format(igene_mg['hits'][0]['_id'])})
        input_genes_df = pd.DataFrame(data=input_gene_set)
        # # group duplicate ids and gather sources
        input_genes_df['sources'] = input_genes_df['sources'].str.join(', ')
        input_genes_df = input_genes_df.groupby(
            ['input_id', 'input_symbol', 'hit_id', 'hit_symbol',
             'relation'])['sources'].apply(', '.join).reset_index()
        return input_genes_df
class GeneInteractions(object):
    def __init__(self):
        self.blw = BioLinkWrapper()
        self.gene_set = []
        self.input_object = ''
        self.meta = {
            'input_type': {
                'complexity': 'set',
                'id_type': 'HGNC',
                'data_type': 'gene',
            },
            'output_type': {
                'complexity': 'set',
                'id_type': 'HGNC',
                'data_type': 'gene',
            },
            'source': 'Monarch Biolink',
            'predicate': ['blm:interacts with']
        }

    def load_input_object(self, input_object):
        self.input_object = input_object

    def load_gene_set(self):
        for gene in self.input_object['input']:
            self.gene_set.append({
                'input_id': gene['hit_id'],
                'sim_input_curie': gene['hit_id'],
                'input_symbol': gene['hit_symbol']
            })

    def get_interactions(self):
        results = []
        for gene in self.gene_set:
            interactions = self.blw.gene_interactions(
                gene_curie=gene['sim_input_curie'])
            for assoc in interactions['associations']:
                interaction = self.blw.parse_association(
                    input_id=gene['sim_input_curie'],
                    input_label=gene['input_symbol'],
                    association=assoc)
                results.append({
                    'input_id': interaction['input_id'],
                    'input_symbol': interaction['input_symbol'],
                    'hit_symbol': interaction['hit_symbol'],
                    'hit_id': interaction['hit_id'],
                    'score': 0,
                })
        return results
예제 #5
0
 def __init__(self):
     self.blw = BioLinkWrapper(Config().getBiolinkApiEndpoint())
     self.meta = {
         'input_type': {
             'complexity': 'set',
             'id_type': 'HGNC',
             'data_type': 'gene',
         },
         'output_type': {
             'complexity': 'set',
             'id_type': 'HGNC',
             'data_type': 'gene',
         },
         'source': 'Monarch Biolink',
         'predicate': ['blm:interacts with']
     }
 def __init__(self):
     self.blw = BioLinkWrapper()
     self.gene_set = []
     self.input_object = ''
     self.meta = {
         'input_type': {
             'complexity': 'set',
             'id_type': 'HGNC',
             'data_type': 'gene',
         },
         'output_type': {
             'complexity': 'set',
             'id_type': 'HGNC',
             'data_type': 'gene',
         },
         'source': 'Monarch Biolink',
         'predicate': ['blm:interacts with']
     }
예제 #7
0
 def __init__(self):
     self.blw = BioLinkWrapper(Config().get_biolink_api_endpoint())
     self.mg = get_client('gene')
     self.input_object = ''
     self.meta = {
         'data_type': 'disease',
         'input_type': {
             'complexity': 'single',
             'id_type': ['MONDO', 'DO', 'OMIM'],
         },
         'output_type': {
             'complexity': 'set',
             'id_type': 'HGNC'
         },
         'taxon': 'human',
         'limit': None,
         'source': 'Monarch Biolink',
         'predicate': 'blm:gene associated with condition'
     }
예제 #8
0
 def __init__(self):
     self.blw = BioLinkWrapper()
     self.mg = MyGeneInfo()
     self.input_object = ''
     self.meta = {
         'data_type': 'disease',
         'input_type': {
             'complexity': 'single',
             'id_type': ['MONDO', 'DO', 'OMIM'],
         },
         'output_type': {
             'complexity': 'set',
             'id_type': 'HGNC'
         },
         'taxon': 'human',
         'limit': None,
         'source': 'Monarch Biolink',
         'predicate': 'blm:gene associated with condition'
     }
 def __init__(self):
     self.blw = BioLinkWrapper()
     self.gene_set = []
     self.input_object = ''
     self.meta = {
         'input_type': {
             'complexity': 'set',
             'id_type': 'HGNC',
             'data_type': 'gene',
         },
         'output_type': {
             'complexity': 'set',
             'id_type': 'HGNC',
             'data_type': 'gene',
         },
         'source': 'Monarch Biolink',
         'predicate': ['blm:similar_to']
     }
     print("""Mod1B OwlSim Phenotype Similarity metadata:""")
     pprint(self.meta)
class OwlsimSimilarity(object):
    def __init__(self):
        self.blw = BioLinkWrapper()
        self.gene_set = []
        self.input_object = ''
        self.meta = {
            'input_type': {
                'complexity': 'set',
                'id_type': 'HGNC',
                'data_type': 'gene',
            },
            'output_type': {
                'complexity': 'set',
                'id_type': 'HGNC',
                'data_type': 'gene',
            },
            'source': 'Monarch Biolink',
            'predicate': ['blm:similar_to']
        }
        print("""Mod1B OwlSim Phenotype Similarity metadata:""")
        pprint(self.meta)

    def load_input_object(self, input_object):
        self.input_object = input_object

    def load_gene_set(self):
        for gene in self.input_object['input']:
            self.gene_set.append({
                'input_id': gene['hit_id'],
                'sim_input_curie': gene['hit_id'],
                'input_symbol': gene['hit_symbol']
            })

    def calculate_similarity(self):
        results = []
        for index, gene in enumerate(self.gene_set):
            try:
                owlsim = self.blw.compute_owlsim(gene['sim_input_curie'])
                for match in owlsim['matches']:
                    if match['type'] == 'gene':
                        results.append({
                            'input_id': gene['input_id'],
                            'input_symbol': gene['input_symbol'],
                            'hit_symbol': match['label'],
                            'hit_id': match['id'],
                            'score': match['score'],
                        })
            except Exception as e:
                print(match['id'], e)
        return results
예제 #11
0
class GeneInteractions(object):

    def __init__(self):
        self.blw = BioLinkWrapper(Config().get_biolink_api_endpoint())
        self.meta = {
            'input_type': {
                'complexity': 'set',
                'id_type': 'HGNC',
                'data_type': 'gene',
            },
            'output_type': {
                'complexity': 'set',
                'id_type': 'HGNC',
                'data_type': 'gene',
            },

            'source': 'Monarch Biolink',
            'predicate': ['blm:interacts with']
        }

    def metadata(self):
        print("""Mod1E Interaction Network metadata:""")
        pprint(self.meta)

    @staticmethod
    # RMB: July 5, 2019 - gene_records is a Pandas DataFrame
    def load_gene_set(gene_records):
        annotated_gene_set = []
        for gene in gene_records.to_dict(orient='records'):
            annotated_gene_set.append({
                'input_id': gene['hit_id'],
                'sim_input_curie': gene['hit_id'],
                'input_symbol': gene['hit_symbol']
            })
        return annotated_gene_set

    def get_interactions(self, input_gene_set, threshold):

        annotated_input_gene_set = self.load_gene_set(input_gene_set)
        lower_bound = int(threshold)

        results = []
        for gene in annotated_input_gene_set:
            interactions = self.blw.gene_interactions(gene_curie=gene['sim_input_curie'])
            for assoc in interactions['associations']:
                interaction = \
                    self.blw.parse_association(
                        input_id=gene['sim_input_curie'],
                        input_label=gene['input_symbol'],
                        association=assoc
                    )
                results.append({
                    'input_id': interaction['input_id'],
                    'input_symbol': interaction['input_symbol'],
                    'hit_symbol': interaction['hit_symbol'],
                    'hit_id': interaction['hit_id'],
                    'score': 0,
                })

        # Process the results
        results = pd.DataFrame().from_records(results)
        counts = results['hit_symbol'].value_counts().rename_axis('unique_values').to_frame('counts').reset_index()
        high_counts = counts[counts['counts'] > lower_bound]['unique_values'].tolist()
        results = pd.DataFrame(results[results['hit_symbol'].isin(high_counts)])

        return results