Beispiel #1
0
def get_phospho_antibody_map(fname=antibody_map_file):
    # First gather the annotations for the phosphosites
    df = pandas.read_csv(fname, index_col=None, sep=',', encoding='utf8')
    antibody_map = {}

    for _, row in df.iterrows():
        ps = row['phosphosite']
        sub_upid = row['SUB_ID']
        if not pandas.isnull(sub_upid):
            if sub_upid.find('-') != -1:
                sub_upid = sub_upid.split('-')[0]
            sub_hgnc_symbol = uniprot_client.get_gene_name(sub_upid)
            sub_hgnc = hgnc_client.get_hgnc_id(sub_hgnc_symbol)
        else:
            sub_hgnc_symbol = row['SUB_GENE']
            sub_hgnc_id = hgnc_client.get_hgnc_id(sub_hgnc_symbol)
            sub_upid = hgnc_client.get_uniprot_id(sub_hgnc_id)
            if sub_upid is None:
                continue
        sub = Agent(sub_hgnc_symbol,
                    db_refs={
                        'UP': sub_upid,
                        'HGNC': sub_hgnc
                    })
        residue = row['Actual_site'][0]
        if len(row['Actual_site']) > 1:
            position = row['Actual_site'][1:]
        else:
            position = None
        mc = ModCondition('phosphorylation', residue, position)
        sub.mods = [mc]
        if ps in antibody_map:
            found = False
            for p in antibody_map[ps]:
                if p.name == sub.name and p.mods[0].residue == residue and \
                    p.mods[0].position == position:
                    found = True
                    break
            if not found:
                antibody_map[ps].append(sub)
        else:
            antibody_map[ps] = [sub]
    return antibody_map