def get_mgi_to_ensembl_map(): return dict( biomart.quick_query( dataset='mmusculus_gene_ensembl', attributes=['mgi_id', 'ensembl_gene_id'], ) )
def get_all_ensembl_names(): "@return: All ensembl names." from biopsy.identifiers.biomart import quick_query logging.info("Querying Ensembl biomart for all mouse genes' names") return dict( quick_query(dataset='mmusculus_gene_ensembl', attributes=('ensembl_gene_id', 'external_gene_id')))
def genes_for_go_id(go_id): import biopsy.identifiers.biomart as B for row in B.quick_query( dataset='mmusculus_gene_ensembl', attributes=['ensembl_gene_id'], filters=[('go', go_id)], ): yield row[0]
def entrez_to_ensembl(): import biopsy.identifiers.biomart as B return dict( B.quick_query( dataset='mmusculus_gene_ensembl', attributes=['entrezgene', 'ensembl_gene_id'], filters=() ) )
def transcripts_to_genes(): import biopsy.identifiers.biomart as B return dict( B.quick_query( dataset='mmusculus_gene_ensembl', attributes=['ensembl_transcript_id', 'ensembl_gene_id'], filters=() ) )
def get_all_ensembl_names(): "@return: All ensembl names." from biopsy.identifiers.biomart import quick_query logging.info("Querying Ensembl biomart for all mouse genes' names") return dict( quick_query( dataset='mmusculus_gene_ensembl', attributes=('ensembl_gene_id', 'external_gene_id') ) )
def go_ids_for_genes(genes): import biopsy.identifiers.biomart as B for row in B.quick_query( dataset='mmusculus_gene_ensembl', attributes=[ 'ensembl_gene_id', #'go_cellular_component_id', 'go_biological_process_id', #'go_molecular_function_id' ], filters=[('ensembl_gene_id', ','.join(genes))], ): yield row
def get_all_ensembl_go_annotations(): "@return: A map from ensembl genes to sets of go annotations." import biopsy.identifiers.biomart as biomart logging.info('Querying Ensembl biomart for all GO annotations') result = cookbook.DictOfLists() for id_attr, evidence_attr in [ ('go_biological_process_id', 'go_biological_process_linkage_type'), ('go_cellular_component_id', 'go_cellular_component_linkage_type'), ('go_molecular_function_id', 'go_molecular_function_linkage_type'), ]: for row in biomart.quick_query( dataset='mmusculus_gene_ensembl', attributes=['ensembl_gene_id', id_attr, evidence_attr] ): if row[2] not in options.go_evidence_codes_to_ignore and row[1]: result[row[0]].append(row[1]) logging.info('Found %d go annotations', sum(len(v) for v in result.values())) return result
def transcripts_to_genes(): import biopsy.identifiers.biomart as B return dict( B.quick_query(dataset='mmusculus_gene_ensembl', attributes=['ensembl_transcript_id', 'ensembl_gene_id'], filters=()))
def get_rat_mouse_orthologs(): from biopsy.identifiers.biomart import quick_query logging.info('Getting rat mouse orthologs from Ensembl') result = dict(quick_query(dataset='rnorvegicus_gene_ensembl', attributes=['ensembl_gene_id', 'mouse_ensembl_gene'])) logging.info('Mapped %d rat genes to mouse', len(result)) return result