def test_mouse_map(): hgnc_id1 = hgnc_client.get_hgnc_from_mouse('109599') hgnc_id2 = hgnc_client.get_hgnc_from_mouse('MGI:109599') assert hgnc_id1 == '4820' assert hgnc_id2 == '4820' hgnc_id = hgnc_client.get_hgnc_from_mouse('xxx') assert hgnc_id is None
def mgi_to_hgnc_name(gene_list): """Convert given mouse gene symbols to HGNC equivalent symbols""" filtered_mgi = { mouse_gene_name_to_mgi[gene] for gene in gene_list if gene in mouse_gene_name_to_mgi } hgnc_gene_set = set() for mgi_id in filtered_mgi: hgnc_id = get_hgnc_from_mouse(mgi_id) hgnc_gene_set.add(get_hgnc_name(hgnc_id)) return hgnc_gene_set
def _refs_from_mgi_id(mgi_id): ref = {'MGI': mgi_id} hgnc_id = hgnc_client.get_hgnc_from_mouse(mgi_id) if hgnc_id is None: logger.warning('Could not get HGNC ID for MGI ID %s' % mgi_id) return None hgnc_ref = _refs_from_hgnc_id(hgnc_id) if hgnc_ref is None: return None ref.update(hgnc_ref) return ref
def ligand_mgi_to_hgnc_name(seurat_ligand_genes): filtered_mgi = defaultdict(set) for logfc, gene in seurat_ligand_genes.items(): if gene in mouse_gene_name_to_mgi: filtered_mgi[(gene, logfc)].add(mouse_gene_name_to_mgi[gene]) hgnc_gene_dict = defaultdict(set) seen_genes = set() for key, value in filtered_mgi.items(): mgi_id = next(iter(value)) hgnc_id = get_hgnc_from_mouse(mgi_id) hgnc_symbol = get_hgnc_name(hgnc_id) if hgnc_symbol not in seen_genes: hgnc_gene_dict[(key[1])].add(hgnc_symbol) else: pass seen_genes.add(hgnc_symbol) return hgnc_gene_dict
def mgi_to_hgnc_name(gene_list): """Convert given mouse gene symbols to HGNC equivalent symbols""" mouse_gene_name_to_mgi = { v: um.uniprot_mgi.get(k) for k, v in um.uniprot_gene_name.items() if k in um.uniprot_mgi } filtered_mgi = { mouse_gene_name_to_mgi[gene] for gene in gene_list if gene in mouse_gene_name_to_mgi } if len(filtered_mgi) == 0: return 'None' #raise Exception('No genes found') hgnc_gene_set = dict() for mgi_id in filtered_mgi: hgnc_id = get_hgnc_from_mouse(mgi_id) hgnc_name = get_hgnc_name(hgnc_id) return hgnc_name
def load_mouse_genes(fname): """Return a list of human genes based on a table of mouse genes.""" # assumes the csv has headers df = pandas.read_csv(fname) for c in df.columns: # assumes the first column starting with MGI is the relevant one # with MGI:IDs if c.startswith('MGI'): df = df.rename(columns={c: 'MGI'}) break mgi_ids = df['MGI'] genes = [] for mgi_id in mgi_ids: if mgi_id.startswith('MGI:'): mgi_id = mgi_id[4:] hgnc_id = hgnc_client.get_hgnc_from_mouse(mgi_id) if not hgnc_id: print('Could not find human gene corresponding to MGI %s' % mgi_id) continue genes.append(hgnc_id) return genes
def mouse_human_mappings(df): site_data = df[['MgiId', 'MotifPeptide']].values human_peptides = [] for mgi_id_str, peptide in site_data: # Remove --- indicating gaps (start/end of protein) remove_gap = peptide.replace('-', '') star_pos = remove_gap.find('*') # If there's no asterisk (think this happens once in whole dataset) # skip this peptide if star_pos == -1: continue # Remove the star from the peptide proc_peptide = remove_gap.replace('*', '') # Get the position of the target residue (star_pos - 1 + 1) site_pos = star_pos # Get Uniprot ID(s) for this gene(s) human_proteins = set() # Skip peptides with no MGI ID if mgi_id_str is np.nan: continue for mgi_id in mgi_id_str.split('|'): mgi_id = mgi_id.split(':')[1] int(mgi_id) hgnc_id = hgnc_client.get_hgnc_from_mouse(mgi_id) if hgnc_id is not None: up_id_hgnc = hgnc_client.get_uniprot_id(hgnc_id) #gene_sym = hgnc_client.get_hgnc_name(hgnc_id) if up_id_hgnc is None: continue # If there is more than one hgnc->up_id, try both up_ids = up_id_hgnc.split(',') for up_id in up_ids: human_proteins.add(up_id.strip()) if len(human_proteins) > 1: print("Warning: >1 protein: %s, %s" % (mgi_id_str, str(human_proteins))) for human_prot in human_proteins: human_peptides.append((human_prot, proc_peptide, site_pos)) return human_peptides