class LookUp(object): def __init__(self): self.blw = BioLinkWrapper(Config().get_biolink_api_endpoint()) self.mg = get_client('gene') self.input_object = '' self.meta = { 'data_type': 'disease', 'input_type': { 'complexity': 'single', 'id_type': ['MONDO', 'DO', 'OMIM'], }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC' }, 'taxon': 'human', 'limit': None, 'source': 'Monarch Biolink', 'predicate': 'blm:gene associated with condition' } def metadata(self): print("""Mod O DiseaseGeneLookup metadata:""") pprint(self.meta) # # def load_input_object(self, input_object): # input_object = self.blw.get_obj(obj_id=input_object['input']) # self.input_object = { # 'id': input_object['id'], # 'label': input_object['label'], # 'description': input_object['description'], # } # # def get_input_object_id(self): # return self.input_object['id'] def echo_input_object(self, output=None): if output: dump(self.input_object, output, indent=4, separators=(',', ': ')) else: dump(self.input_object, stdout, indent=4, separators=(',', ': ')) def disease_geneset_lookup(self, name, id): input_disease_label = name input_disease_id = id input_gene_set = self.blw.disease2genes(input_disease_id) input_gene_set = [self.blw.parse_association(input_disease_id, input_disease_label, x) for x in input_gene_set['associations']] for input_gene in input_gene_set: igene_mg = self.mg.query(input_gene['hit_id'].replace('HGNC', 'hgnc'), species='human', entrezonly=True, fields='entrez,HGNC,symbol') input_gene.update({'input_ncbi': 'NCBIGene:{}'.format(igene_mg['hits'][0]['_id'])}) input_genes_df = pd.DataFrame(data=input_gene_set) # # group duplicate ids and gather sources input_genes_df['sources'] = input_genes_df['sources'].str.join(', ') input_genes_df = input_genes_df.groupby( ['input_id', 'input_symbol', 'hit_id', 'hit_symbol', 'relation'])['sources'].apply(', '.join).reset_index() return input_genes_df
class GeneInteractions(object): def __init__(self): self.blw = BioLinkWrapper() self.meta = { 'input_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'source': 'Monarch Biolink', 'predicate': ['blm:interacts with'] } def metadata(self): print("""Mod1E Interaction Network metadata:""") pprint(self.meta) @staticmethod def load_gene_set(input_gene_set): annotated_gene_set = [] for gene in input_gene_set.get_input_curie_set(): annotated_gene_set.append({ 'input_id': gene['hit_id'], 'sim_input_curie': gene['hit_id'], 'input_symbol': gene['hit_symbol'] }) return annotated_gene_set def get_interactions(self, annotated_gene_set): results = [] for gene in annotated_gene_set: interactions = self.blw.gene_interactions(gene_curie=gene['sim_input_curie']) for assoc in interactions['associations']: interaction = \ self.blw.parse_association( input_id=gene['sim_input_curie'], input_label=gene['input_symbol'], association=assoc ) results.append({ 'input_id': interaction['input_id'], 'input_symbol': interaction['input_symbol'], 'hit_symbol': interaction['hit_symbol'], 'hit_id': interaction['hit_id'], 'score': 0, }) return results
class LookUp(object): def __init__(self): self.blw = BioLinkWrapper() self.mg = MyGeneInfo() self.input_object = '' self.meta = { 'data_type': 'disease', 'input_type': { 'complexity': 'single', 'id_type': ['MONDO', 'DO', 'OMIM'], }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC' }, 'taxon': 'human', 'limit': None, 'source': 'Monarch Biolink', 'predicate': 'blm:gene associated with condition' } print("""Mod O DiseaseGeneLookup metadata:""") pprint(self.meta) def load_input_object(self, input_object): input_object = self.blw.get_obj(obj_id=input_object['input']) self.input_object = { 'id': input_object['id'], 'label': input_object['label'], 'description': input_object['description'], } pprint(self.input_object) def disease_geneset_lookup(self): input_disease_id = self.input_object['id'] input_disease_label = self.input_object['label'] input_gene_set = self.blw.disease2genes(input_disease_id) input_gene_set = [ self.blw.parse_association(input_disease_id, input_disease_label, x) for x in input_gene_set['associations'] ] # for input_gene in input_gene_set: # igene_mg = self.mg.query(input_gene['hit_id'].replace('HGNC', 'hgnc'), species='human', entrezonly=True, # fields='entrez,HGNC,symbol') # input_gene.update({'input_ncbi': 'NCBIGene:{}'.format(igene_mg['hits'][0]['_id'])}) input_genes_df = pd.DataFrame(data=input_gene_set) # # group duplicate ids and gather sources input_genes_df['sources'] = input_genes_df['sources'].str.join(', ') input_genes_df = input_genes_df.groupby( ['input_id', 'input_symbol', 'hit_id', 'hit_symbol', 'relation'])['sources'].apply(', '.join).reset_index() return input_genes_df
class GeneInteractions(object): def __init__(self): self.blw = BioLinkWrapper() self.gene_set = [] self.input_object = '' self.meta = { 'input_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'source': 'Monarch Biolink', 'predicate': ['blm:interacts with'] } def load_input_object(self, input_object): self.input_object = input_object def load_gene_set(self): for gene in self.input_object['input']: self.gene_set.append({ 'input_id': gene['hit_id'], 'sim_input_curie': gene['hit_id'], 'input_symbol': gene['hit_symbol'] }) def get_interactions(self): results = [] for gene in self.gene_set: interactions = self.blw.gene_interactions( gene_curie=gene['sim_input_curie']) for assoc in interactions['associations']: interaction = self.blw.parse_association( input_id=gene['sim_input_curie'], input_label=gene['input_symbol'], association=assoc) results.append({ 'input_id': interaction['input_id'], 'input_symbol': interaction['input_symbol'], 'hit_symbol': interaction['hit_symbol'], 'hit_id': interaction['hit_id'], 'score': 0, }) return results
def __init__(self): self.blw = BioLinkWrapper(Config().getBiolinkApiEndpoint()) self.meta = { 'input_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'source': 'Monarch Biolink', 'predicate': ['blm:interacts with'] }
def __init__(self): self.blw = BioLinkWrapper() self.gene_set = [] self.input_object = '' self.meta = { 'input_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'source': 'Monarch Biolink', 'predicate': ['blm:interacts with'] }
def __init__(self): self.blw = BioLinkWrapper(Config().get_biolink_api_endpoint()) self.mg = get_client('gene') self.input_object = '' self.meta = { 'data_type': 'disease', 'input_type': { 'complexity': 'single', 'id_type': ['MONDO', 'DO', 'OMIM'], }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC' }, 'taxon': 'human', 'limit': None, 'source': 'Monarch Biolink', 'predicate': 'blm:gene associated with condition' }
def __init__(self): self.blw = BioLinkWrapper() self.mg = MyGeneInfo() self.input_object = '' self.meta = { 'data_type': 'disease', 'input_type': { 'complexity': 'single', 'id_type': ['MONDO', 'DO', 'OMIM'], }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC' }, 'taxon': 'human', 'limit': None, 'source': 'Monarch Biolink', 'predicate': 'blm:gene associated with condition' }
def __init__(self): self.blw = BioLinkWrapper() self.gene_set = [] self.input_object = '' self.meta = { 'input_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'source': 'Monarch Biolink', 'predicate': ['blm:similar_to'] } print("""Mod1B OwlSim Phenotype Similarity metadata:""") pprint(self.meta)
class OwlsimSimilarity(object): def __init__(self): self.blw = BioLinkWrapper() self.gene_set = [] self.input_object = '' self.meta = { 'input_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'source': 'Monarch Biolink', 'predicate': ['blm:similar_to'] } print("""Mod1B OwlSim Phenotype Similarity metadata:""") pprint(self.meta) def load_input_object(self, input_object): self.input_object = input_object def load_gene_set(self): for gene in self.input_object['input']: self.gene_set.append({ 'input_id': gene['hit_id'], 'sim_input_curie': gene['hit_id'], 'input_symbol': gene['hit_symbol'] }) def calculate_similarity(self): results = [] for index, gene in enumerate(self.gene_set): try: owlsim = self.blw.compute_owlsim(gene['sim_input_curie']) for match in owlsim['matches']: if match['type'] == 'gene': results.append({ 'input_id': gene['input_id'], 'input_symbol': gene['input_symbol'], 'hit_symbol': match['label'], 'hit_id': match['id'], 'score': match['score'], }) except Exception as e: print(match['id'], e) return results
class GeneInteractions(object): def __init__(self): self.blw = BioLinkWrapper(Config().get_biolink_api_endpoint()) self.meta = { 'input_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'output_type': { 'complexity': 'set', 'id_type': 'HGNC', 'data_type': 'gene', }, 'source': 'Monarch Biolink', 'predicate': ['blm:interacts with'] } def metadata(self): print("""Mod1E Interaction Network metadata:""") pprint(self.meta) @staticmethod # RMB: July 5, 2019 - gene_records is a Pandas DataFrame def load_gene_set(gene_records): annotated_gene_set = [] for gene in gene_records.to_dict(orient='records'): annotated_gene_set.append({ 'input_id': gene['hit_id'], 'sim_input_curie': gene['hit_id'], 'input_symbol': gene['hit_symbol'] }) return annotated_gene_set def get_interactions(self, input_gene_set, threshold): annotated_input_gene_set = self.load_gene_set(input_gene_set) lower_bound = int(threshold) results = [] for gene in annotated_input_gene_set: interactions = self.blw.gene_interactions(gene_curie=gene['sim_input_curie']) for assoc in interactions['associations']: interaction = \ self.blw.parse_association( input_id=gene['sim_input_curie'], input_label=gene['input_symbol'], association=assoc ) results.append({ 'input_id': interaction['input_id'], 'input_symbol': interaction['input_symbol'], 'hit_symbol': interaction['hit_symbol'], 'hit_id': interaction['hit_id'], 'score': 0, }) # Process the results results = pd.DataFrame().from_records(results) counts = results['hit_symbol'].value_counts().rename_axis('unique_values').to_frame('counts').reset_index() high_counts = counts[counts['counts'] > lower_bound]['unique_values'].tolist() results = pd.DataFrame(results[results['hit_symbol'].isin(high_counts)]) return results