Example #1
0
 def __init__(self):
     self.blw = BioLinkApiWrapper(Config().get_biolink_api_endpoint())
     self.mg = get_client('gene')
     self.meta = {
         'taxon': 'human',
         'limit': None,
     }
class GeneInteractions:
    def __init__(self):
        self.blw = BioLinkApiWrapper(Config().get_biolink_api_endpoint())

    @staticmethod
    # RMB: July 5, 2019 - gene_records is a Pandas DataFrame
    def load_gene_set(gene_records):
        annotated_gene_set = []
        for gene in gene_records.to_dict(orient='records'):
            if not gene['hit_symbol']:
                gene['hit_symbol'] = \
                    Resolver.get_the_resolver(). \
                        translate_one(source=gene['hit_id'], identifier_range=SYMBOL)

            annotated_gene_set.append({
                'input_id': gene['hit_id'],
                'sim_input_curie': gene['hit_id'],
                'input_symbol': gene['hit_symbol']
            })
        return annotated_gene_set

    def get_interactions(self, input_gene_set, threshold):

        annotated_input_gene_set = self.load_gene_set(input_gene_set)
        lower_bound = int(threshold)

        results = []
        for gene in annotated_input_gene_set:
            interactions = self.blw.gene_interactions(
                gene_curie=gene['sim_input_curie'])
            for assoc in interactions['associations']:
                interaction = \
                    self.blw.parse_association(
                        input_id=gene['sim_input_curie'],
                        input_label=gene['input_symbol'],
                        association=assoc
                    )
                results.append({
                    'input_id': interaction['input_id'],
                    'input_symbol': interaction['input_symbol'],
                    'hit_symbol': interaction['hit_symbol'],
                    'hit_id': interaction['hit_id'],
                    'score': 1,  # CX: changed score from 0 to 1
                })

        # Process the results
        results = pd.DataFrame().from_records(results)
        counts = results['hit_symbol'].value_counts().rename_axis(
            'unique_values').to_frame('counts').reset_index()
        high_counts = counts[
            counts['counts'] > lower_bound]['unique_values'].tolist()
        results = pd.DataFrame(
            results[results['hit_symbol'].isin(high_counts)])

        # CX: remove results where input gene = output gene. Output gene can still be disease associated genes.
        results = results[~(results['hit_symbol'] == results['input_symbol'])]

        return results
Example #3
0
class LookUp(object):
    def __init__(self):
        self.blw = BioLinkApiWrapper(Config().get_biolink_api_endpoint())
        self.mg = get_client('gene')
        self.meta = {
            'taxon': 'human',
            'limit': None,
        }

    def metadata(self):
        print("""Mod O DiseaseGeneLookup metadata:""")
        pprint(self.meta)

    def disease_geneset_lookup(self,
                               disease_identifier,
                               disease_label,
                               query_biolink=True):

        disease_gene_association_results = self.blw.disease2genes(
            disease_identifier)
        input_gene_set = [
            self.blw.parse_association(disease_identifier, disease_label,
                                       association)
            for association in disease_gene_association_results['associations']
        ]

        for input_gene in input_gene_set:
            if query_biolink:
                igene_mg = self.mg.query(input_gene['hit_id'].replace(
                    'HGNC', 'hgnc'),
                                         species='human',
                                         entrezonly=True,
                                         fields='entrez,HGNC,symbol')
                if igene_mg['hits']:
                    input_gene.update({
                        'input_ncbi':
                        'NCBIGene:{}'.format(igene_mg['hits'][0]['_id'])
                    })
        input_genes_df = pd.DataFrame(data=input_gene_set)
        if not input_genes_df.empty:
            # group duplicate identifier and gather sources
            input_genes_df['sources'] = input_genes_df['sources'].str.join(
                ', ')
            input_genes_df = input_genes_df.groupby([
                'input_id', 'input_symbol', 'hit_id', 'hit_symbol', 'relation'
            ])['sources'].apply(', '.join).reset_index()
        return input_genes_df
 def __init__(self):
     self.blw = BioLinkApiWrapper(Config().get_biolink_api_endpoint())