Ejemplo n.º 1
0
def test_mouse_map():
    hgnc_id1 = hgnc_client.get_hgnc_from_mouse('109599')
    hgnc_id2 = hgnc_client.get_hgnc_from_mouse('MGI:109599')
    assert hgnc_id1 == '4820'
    assert hgnc_id2 == '4820'
    hgnc_id = hgnc_client.get_hgnc_from_mouse('xxx')
    assert hgnc_id is None
def mgi_to_hgnc_name(gene_list):
    """Convert given mouse gene symbols to HGNC equivalent symbols"""
    filtered_mgi = {
        mouse_gene_name_to_mgi[gene]
        for gene in gene_list if gene in mouse_gene_name_to_mgi
    }
    hgnc_gene_set = set()
    for mgi_id in filtered_mgi:
        hgnc_id = get_hgnc_from_mouse(mgi_id)
        hgnc_gene_set.add(get_hgnc_name(hgnc_id))
    return hgnc_gene_set
Ejemplo n.º 3
0
def _refs_from_mgi_id(mgi_id):
    ref = {'MGI': mgi_id}
    hgnc_id = hgnc_client.get_hgnc_from_mouse(mgi_id)
    if hgnc_id is None:
        logger.warning('Could not get HGNC ID for MGI ID %s' %
                       mgi_id)
        return None
    hgnc_ref = _refs_from_hgnc_id(hgnc_id)
    if hgnc_ref is None:
        return None
    ref.update(hgnc_ref)
    return ref
def ligand_mgi_to_hgnc_name(seurat_ligand_genes):
    filtered_mgi = defaultdict(set)
    for logfc, gene in seurat_ligand_genes.items():
        if gene in mouse_gene_name_to_mgi:
            filtered_mgi[(gene, logfc)].add(mouse_gene_name_to_mgi[gene])

    hgnc_gene_dict = defaultdict(set)
    seen_genes = set()
    for key, value in filtered_mgi.items():
        mgi_id = next(iter(value))
        hgnc_id = get_hgnc_from_mouse(mgi_id)
        hgnc_symbol = get_hgnc_name(hgnc_id)
        if hgnc_symbol not in seen_genes:
            hgnc_gene_dict[(key[1])].add(hgnc_symbol)
        else:
            pass
        seen_genes.add(hgnc_symbol)
    return hgnc_gene_dict
Ejemplo n.º 5
0
def mgi_to_hgnc_name(gene_list):
    """Convert given mouse gene symbols to HGNC equivalent symbols"""
    mouse_gene_name_to_mgi = {
        v: um.uniprot_mgi.get(k)
        for k, v in um.uniprot_gene_name.items() if k in um.uniprot_mgi
    }

    filtered_mgi = {
        mouse_gene_name_to_mgi[gene]
        for gene in gene_list if gene in mouse_gene_name_to_mgi
    }
    if len(filtered_mgi) == 0:
        return 'None'
        #raise Exception('No genes found')

    hgnc_gene_set = dict()
    for mgi_id in filtered_mgi:
        hgnc_id = get_hgnc_from_mouse(mgi_id)
        hgnc_name = get_hgnc_name(hgnc_id)
    return hgnc_name
Ejemplo n.º 6
0
def load_mouse_genes(fname):
    """Return a list of human genes based on a table of mouse genes."""
    # assumes the csv has headers
    df = pandas.read_csv(fname)
    for c in df.columns:
        # assumes the first column starting with MGI is the relevant one
        # with MGI:IDs
        if c.startswith('MGI'):
            df = df.rename(columns={c: 'MGI'})
            break
    mgi_ids = df['MGI']
    genes = []
    for mgi_id in mgi_ids:
        if mgi_id.startswith('MGI:'):
            mgi_id = mgi_id[4:]
        hgnc_id = hgnc_client.get_hgnc_from_mouse(mgi_id)
        if not hgnc_id:
            print('Could not find human gene corresponding to MGI %s' % mgi_id)
            continue
        genes.append(hgnc_id)
    return genes
Ejemplo n.º 7
0
def mouse_human_mappings(df):
    site_data = df[['MgiId', 'MotifPeptide']].values
    human_peptides = []
    for mgi_id_str, peptide in site_data:
        # Remove --- indicating gaps (start/end of protein)
        remove_gap = peptide.replace('-', '')
        star_pos = remove_gap.find('*')
        # If there's no asterisk (think this happens once in whole dataset)
        # skip this peptide
        if star_pos == -1:
            continue
        # Remove the star from the peptide
        proc_peptide = remove_gap.replace('*', '')
        # Get the position of the target residue (star_pos - 1 + 1)
        site_pos = star_pos
        # Get Uniprot ID(s) for this gene(s)
        human_proteins = set()
        # Skip peptides with no MGI ID
        if mgi_id_str is np.nan:
            continue
        for mgi_id in mgi_id_str.split('|'):
            mgi_id = mgi_id.split(':')[1]
            int(mgi_id)
            hgnc_id = hgnc_client.get_hgnc_from_mouse(mgi_id)
            if hgnc_id is not None:
                up_id_hgnc = hgnc_client.get_uniprot_id(hgnc_id)
                #gene_sym = hgnc_client.get_hgnc_name(hgnc_id)
                if up_id_hgnc is None:
                    continue
                # If there is more than one hgnc->up_id, try both
                up_ids = up_id_hgnc.split(',')
                for up_id in up_ids:
                    human_proteins.add(up_id.strip())
        if len(human_proteins) > 1:
            print("Warning: >1 protein: %s, %s" %
                  (mgi_id_str, str(human_proteins)))
        for human_prot in human_proteins:
            human_peptides.append((human_prot, proc_peptide, site_pos))
    return human_peptides