Beispiel #1
0
def combine_corrs(e1, e2, allprots, combine_func, default_val=None):
    # we combine the symmetric correlation matrices using the specified
    # element-wise function. function examples: max, sum
    # we use the specified ordering of elements in allprots
    default_val = default_val if default_val else -1 if combine_func.__name__.find("max") > -1 else 0
    nprots = len(allprots)
    corr = np.matrix(np.zeros((nprots, nprots)))
    dprots1 = ut.list_inv_to_dict(e1.prots)
    dprots2 = ut.list_inv_to_dict(e2.prots)
    for row, p1 in enumerate(allprots):
        for col, p2 in enumerate(allprots):
            val1 = e1.corr[dprots1[p1], dprots1[p2]] if p1 in dprots1 and p2 in dprots1 else default_val
            val2 = e1.corr[dprots2[p1], dprots2[p2]] if p1 in dprots2 and p2 in dprots2 else default_val
            corr[row, col] = combine_func(val1, val2)
    return corr
Beispiel #2
0
def prot_counts_pep2prots(peplist, only_uniques, pep2prots):
    """
    - pep2prots: use supplied dict of {peptide: set(proteinids)} instead of the
      protein ids on the lines in the pep_list file, in which case sum up the
      counts for a peptide-spectral combination. 
    """
    assert only_uniques, "Only handles only_uniques=True so far."
    exclude_peps = (set([pep for pep,prots in pep2prots.items() if len(prots)>1])
            if only_uniques else set([]))
    print "%s non-unique peptides to exclude." % len(exclude_peps)
    pep_samp_counts = defaultdict(float)
    for _,sample,pep,count in peplist:
        if pep not in exclude_peps:
            pep_samp_counts[(pep,sample)] += float(count)
    # Currently ignoring peptides without a mapping.
    prots = sorted(list(reduce(set.union, [pep2prots[pep] for (pep,_),_ in
        pep_samp_counts.items() if pep in pep2prots])))
    samples = sorted(list(set(ut.i1(peplist))))
    print "%s unique proteins. %s samples." % (len(prots), len(samples))
    dprots, dsamples = [ut.list_inv_to_dict(lst) for lst in prots, samples]
    counts = np.zeros((len(prots), len(samples)), dtype='float32')
    for (pep,sample),count in pep_samp_counts.items():
        if pep in pep2prots: # Currently ignoring peptides without a mapping.
            assert len(pep2prots[pep])==1, "Non-unique peptide found"
            counts[dprots[list(pep2prots[pep])[0]], dsamples[sample]] += count
    totals = counts.sum(axis=1)
    nonzero = totals > 0
    prots, totals = [list(np.array(lst)[nonzero]) for lst in prots, totals]
    counts = counts[nonzero,:]
    return prots, samples, counts, totals
Beispiel #3
0
 def __init__(self, filename, sp_base="Hs", norm_rows=False, norm_cols=False):
     e = load_elution(filename)
     self.prots = e.prots
     self.filename = e.filename
     self.normarr = ut.normalize_fracs(e.mat, norm_rows=norm_rows, norm_cols=norm_cols)
     self.pinv = ut.list_inv_to_dict(e.prots)
     sp_target = ut.shortname(e.filename)[:2]
     self.baseid2inds = sc.orth_indices(sp_base, sp_target, e.prots, False)
Beispiel #4
0
 def __init__(self, filename, sp_base='Hs', norm_rows=False, norm_cols=False):
     e = load_elution(filename)
     self.prots = e.prots
     self.filename = e.filename
     self.normarr = ut.normalize_fracs(e.mat, norm_rows=norm_rows,
             norm_cols=norm_cols)
     self.pinv = ut.list_inv_to_dict(e.prots)
     sp_target = ut.shortname(e.filename)[:2]
     self.baseid2inds = sc.orth_indices(sp_base, sp_target, e.prots, False)
Beispiel #5
0
def combine_corrs(e1, e2, allprots, combine_func, default_val=None):
    # we combine the symmetric correlation matrices using the specified
    # element-wise function. function examples: max, sum
    # we use the specified ordering of elements in allprots
    default_val = default_val if default_val else -1 if \
        combine_func.__name__.find('max') > -1 else 0
    nprots = len(allprots)
    corr = np.matrix(np.zeros((nprots,nprots)))
    dprots1 = ut.list_inv_to_dict(e1.prots)
    dprots2 = ut.list_inv_to_dict(e2.prots)
    for row,p1 in enumerate(allprots):
        for col,p2 in enumerate(allprots):
            val1 = e1.corr[dprots1[p1], dprots1[p2]] if p1 in dprots1 and p2 in \
                dprots1 else default_val
            val2 = e1.corr[dprots2[p1], dprots2[p2]] if p1 in dprots2 and p2 in \
                dprots2 else default_val
            corr[row,col] = combine_func(val1, val2)
    return corr
Beispiel #6
0
def ppis_add_sp_ppis(ppis, arrfeats):
    idict = ut.list_inv_to_dict(((r[0],r[1]) for r in arrfeats))
    newppis = []
    cols = [n for n in arrfeats.dtype.names if n[2:].startswith('_ppi_score')]
    for p in ppis:
        index = idict[(p[0],p[1])]
        sp_ppis = arrfeats[index:index+1][cols][0]
        newppis.append(tuple(p) + tuple(sp_ppis))
    return newppis, cols
def elut_gene_maxes(elutfs, geneids):
    d = {}
    for f in elutfs:
        e = el.load_elution(f)
        prots_inv = ut.list_inv_to_dict(e.prots)
        for gid in geneids:
            if gid in prots_inv:
                d.setdefault(f,{})[gid] = np.max(e.mat[prots_inv[gid]])
    return d
Beispiel #8
0
def ppis_add_splist(ppis, arrfeats, cutoff):
    idict = ut.list_inv_to_dict(((r[0],r[1]) for r in arrfeats))
    newppis = []
    print "Cutoff: %s" % cutoff
    for p in ppis:
        index = idict[(p[0],p[1])]
        splist = fe.passing_species_separate(arrfeats[index:index+1], cutoff)
        newppis.append(tuple(p) + tuple(splist))
    sps = fe.species_list(arrfeats.dtype.names[3:])
    return newppis, sps
Beispiel #9
0
def filter_matching_elution(edata, efilter, remove_data_ending=".map"):
    """
    Use efilter as a mask on edata, setting edata to 0 based on efilter being 0
    """
    newmat = np.matrix(np.zeros(edata.mat.shape))
    # First create the column-matched array from filter to data
    inv_fracs = ut.list_inv_to_dict(efilter.fractions)
    data_fracs = [f.replace(remove_data_ending, "") for f in edata.fractions]
    arrfilt = np.zeros((efilter.mat.shape[0], edata.mat.shape[1]))
    for i, f in enumerate(data_fracs):
        arrfilt[:, i] = np.asarray(efilter.mat)[:, inv_fracs[f]] if f in inv_fracs else np.zeros(efilter.mat.shape[0])
    # Then go row-by-row
    filter_map = ut.list_inv_to_dict(efilter.prots)
    for i, g in enumerate(edata.prots):
        if g in filter_map:
            newmat[i, :] = np.asarray(edata.mat)[i, :] * (arrfilt[filter_map[g], :] > 0).astype(int)
        else:
            newmat[i, :] = np.zeros(edata.mat.shape[1])
    return newmat
Beispiel #10
0
def complex_arr(cxs, prots):
    arr = np.zeros((len(prots),len(prots)))
    ints_dict = co.corum_ints_duped([(i,ps) for i,ps in enumerate(cxs)])
    p_inds = ut.list_inv_to_dict(prots)
    for p,partners in ints_dict.items():
        if p in p_inds:
            for partner in partners:
                if partner in p_inds:
                    arr[p_inds[p], p_inds[partner]] = 1
    return arr
Beispiel #11
0
def profiles_cxs(e, cxs, **kwargs):
    # blue/yellow/red map: 'jet'
    defaults = {'interpolation': 'nearest', 'cmap':'hot', 'vmin':1}
    kwargs = ut.dict_set_defaults(kwargs, defaults)
    arr = np.array(e.mat)
    dinds = ut.list_inv_to_dict(e.prots)
    useps = [p for c in cxs for p in c]
    useinds = [dinds[p] for p in useps if p in dinds]
    vals = np.clip(np.log2(arr[useinds,:]),0,100)
    imshow(vals, **kwargs)
    return vals
Beispiel #12
0
def filter_matching_elution(edata, efilter, remove_data_ending='.map'):
    """
    Use efilter as a mask on edata, setting edata to 0 based on efilter being 0
    """
    newmat = np.matrix(np.zeros(edata.mat.shape))
    # First create the column-matched array from filter to data
    inv_fracs = ut.list_inv_to_dict(efilter.fractions)
    data_fracs = [f.replace(remove_data_ending,"") for f in edata.fractions]
    arrfilt = np.zeros((efilter.mat.shape[0], edata.mat.shape[1]))
    for i,f in enumerate(data_fracs):
        arrfilt[:,i] = (np.asarray(efilter.mat)[:,inv_fracs[f]] if f in
        inv_fracs else np.zeros(efilter.mat.shape[0]))
    # Then go row-by-row
    filter_map = ut.list_inv_to_dict(efilter.prots)
    for i,g in enumerate(edata.prots):
        if g in filter_map:
            newmat[i,:] = (np.asarray(edata.mat)[i,:] *
                    (arrfilt[filter_map[g],:] > 0).astype(int))
        else:
            newmat[i,:] = np.zeros(edata.mat.shape[1])
    return newmat
Beispiel #13
0
def orth_indices(sp_base, sp_target, prot_list, remove_multi_base):
    """
    Using appropriate orthology, take a list of target species gene ids
    (corresponding to rows in the target species score matrix), and
    return a dict mapping base species gene ids to (sets of) indices in that
    list and therefore to (sets of) row/column indices in the square
    interaction score matrix. 
    """
    targ2inds = dict([(k,set([v]))
                      for k,v in ut.list_inv_to_dict(prot_list).items()])
    if sp_base == sp_target:
        return targ2inds
    else:
        base2targ = orth.odict(sp_base, sp_target)
        if remove_multi_base:
            base2targ = remove_multi_keys(base2targ)
        base2inds = ut.compose_dict_sets(base2targ, targ2inds)
        base2inds = dict([(k,v) for k,v in base2inds.items() if len(v)>0])
        return base2inds
Beispiel #14
0
def orth_indices(sp_base, sp_target, prot_list, remove_multi_base):
    """
    Using appropriate orthology, take a list of target species gene ids
    (corresponding to rows in the target species score matrix), and
    return a dict mapping base species gene ids to (sets of) indices in that
    list and therefore to (sets of) row/column indices in the square
    interaction score matrix. 
    """
    targ2inds = dict([(k,set([v]))
                      for k,v in ut.list_inv_to_dict(prot_list).items()])
    if sp_base == sp_target:
        return targ2inds
    else:
        base2targ = orth.odict(sp_base, sp_target)
        if remove_multi_base:
            base2targ = remove_multi_keys(base2targ)
        base2inds = ut.compose_dict_sets(base2targ, targ2inds)
        base2inds = dict([(k,v) for k,v in base2inds.items() if len(v)>0])
        return base2inds
Beispiel #15
0
def prot_counts(peplist, only_uniques):
    """
    Deprecated 20130628. pep_list doesn't seem consistent for protein
    assignments.
    mainly returns the counts array with protein quantations.
    - exclude_peps: set of peptides to exclude, probably from non_unique_peps.
    """
    print "**Deprecated 20130628**"
    exclude_peps = non_unique_peps(peplist) if only_uniques else set([])
    prots,samples = [sorted(list(set(lst))) 
            for lst in zip(*[i[:2] for i in peplist])]
    dprots, dsamples = [ut.list_inv_to_dict(lst) for lst in prots, samples]
    counts = np.zeros((len(prots), len(samples)), dtype='float32')
    for prot_peplist, sample, pep, count in peplist:
        if pep not in exclude_peps:
            counts[dprots[prot],dsamples[sample]] += float(count)
    totals = counts.sum(axis=1)
    nonzero = totals > 0
    prots, totals = [list(np.array(lst)[nonzero]) for lst in prots, totals]
    counts = counts[nonzero,:]
    return prots, samples, counts, totals
Beispiel #16
0
 def pair2ind(items):
     return ut.list_inv_to_dict(((x[0],x[1]) for x in items))