Ejemplo n.º 1
0
def single_array(gids, unnorm_eluts, sp, min_count=1,
        remove_multi_base=False, norm_rows=False):
    """
    unnorm_eluts: [el.NormElut(f, sp=sp, norm_cols=False, norm_rows=False) for f in fs]
    """
    import plotting as pl
    use_eluts = elutions_containing_prots(unnorm_eluts, sp, gids, min_count)
    print len(use_eluts), "eluts with proteins"
    ncols = sum([e.normarr.shape[1] for e in use_eluts])
    bigarr = np.zeros((len(gids), ncols))
    startcol = 0
    for e in use_eluts:
        freqarr = ut.normalize_fracs(e.normarr, norm_rows=norm_rows)
        temparr = np.zeros((len(gids), freqarr.shape[1]))
        for i, gid in enumerate(gids):
            if gid in e.baseid2inds:
                inds = list(e.baseid2inds[gid])
                rows = freqarr[inds,:]
                row = np.max(rows, axis=0)
                temparr[i,:] = row
        frac_max = np.max(temparr)
        temparr = np.clip(np.log2(temparr*100 / frac_max), 0, 10)
        bigarr[:, startcol:startcol+freqarr.shape[1]] = temparr
        startcol += freqarr.shape[1]
    return bigarr
Ejemplo n.º 2
0
def pdist_score(mat, metric='euclidean', norm_rows=True,
        norm_cols=True):
    norm_mat = ut.normalize_fracs(mat, norm_rows, norm_cols)
    dists = spatial.distance.pdist(norm_mat, metric=metric)
    dist_mat = spatial.distance.squareform(dists)
    score_mat = 1 - np.nan_to_num(dist_mat)
    return score_mat
Ejemplo n.º 3
0
def pdist_score(mat, metric='euclidean', norm_rows=True,
        norm_cols=True):
    norm_mat = ut.normalize_fracs(mat, norm_rows, norm_cols)
    dists = spatial.distance.pdist(norm_mat, metric=metric)
    dist_mat = spatial.distance.squareform(dists)
    score_mat = 1 - np.nan_to_num(dist_mat)
    return score_mat
Ejemplo n.º 4
0
 def __init__(self, filename, sp_base="Hs", norm_rows=False, norm_cols=False):
     e = load_elution(filename)
     self.prots = e.prots
     self.filename = e.filename
     self.normarr = ut.normalize_fracs(e.mat, norm_rows=norm_rows, norm_cols=norm_cols)
     self.pinv = ut.list_inv_to_dict(e.prots)
     sp_target = ut.shortname(e.filename)[:2]
     self.baseid2inds = sc.orth_indices(sp_base, sp_target, e.prots, False)
Ejemplo n.º 5
0
 def __init__(self, filename, sp_base='Hs', norm_rows=False, norm_cols=False):
     e = load_elution(filename)
     self.prots = e.prots
     self.filename = e.filename
     self.normarr = ut.normalize_fracs(e.mat, norm_rows=norm_rows,
             norm_cols=norm_cols)
     self.pinv = ut.list_inv_to_dict(e.prots)
     sp_target = ut.shortname(e.filename)[:2]
     self.baseid2inds = sc.orth_indices(sp_base, sp_target, e.prots, False)
Ejemplo n.º 6
0
def plot_bigprofiles(prots, pids, unnorm_eluts, sp='Hs', min_count=1,
        remove_multi_base=False, gt=None, eluts_per_plot=10,
        do_cluster=True, label_trans=None, do_plot_tree=False,
        rename_fracs=None, colors=None, **kwargs):
    """
    supply EITHER prots OR protids, set other to None
    unnorm_eluts: [el.NormElut(f, sp=sp, norm_cols=False, norm_rows=False) for f in fs]
    """
    import plotting as pl
    if prots is not None:
        pids = [gt.name2id[p] for p in prots]
    if do_cluster:
        print "clustering"
        pids = cluster_ids(pids, unnorm_eluts, sp, gt=gt, do_plot=do_plot_tree, 
                **kwargs)
    if gt is not None:
        prots = [gt.id2name[pid] for pid in pids if pid in gt.id2name] #re-order to match
    else:
        prots = pids
        print "No gene names provided--labeling with ids."
    if label_trans: 
        print "Translating names for display."
        # Translate displayed names from base ids according to provided dict
        #prots = [gt.id2name[pid] for pid in pids]
        prots = [label_trans.get(p,p) for p in prots]
    prots.reverse(); pids.reverse(); # put them top to bottom
    if colors is not None: colors.reverse()
    print "%s proteins" % len(pids)
    use_eluts = elutions_containing_prots(unnorm_eluts, sp, pids, min_count)
    nplots = int(np.ceil(len(use_eluts) / eluts_per_plot))
    maxfracs = 0
    for iplot in range(nplots):
        pl.subplot(nplots, 1, iplot+1)
        plot_eluts = use_eluts[iplot*eluts_per_plot: (iplot+1)*eluts_per_plot]
        frac_names = [ut.shortname(e.filename) for e in plot_eluts]
        if rename_fracs:
            frac_names = [rename_fracs.get(n,n) for n in frac_names]
        startcols = [0]
        for i,e in enumerate(plot_eluts):
            freqarr = ut.normalize_fracs(e.normarr, norm_rows=False)
            sp_target = ut.shortname(e.filename)[:2]
            protsmax = max([np.max(freqarr[r]) for p in pids if p in
                e.baseid2inds for r in e.baseid2inds[p]])
            plot_big_single(freqarr, pids, e.baseid2inds, protsmax,
                    startcols[-1], colors=colors)
            startcols.append(startcols[-1]+freqarr.shape[1])
        label_ys(prots)
        label_xs(startcols, frac_names)
        pl.grid(False)
        maxfracs = maxfracs if maxfracs > startcols[-1] else startcols[-1]
    for iplot in range(nplots):
        pl.subplot(nplots, 1, iplot+1)
        pl.xlim(0,maxfracs)
    pl.subplots_adjust(hspace=5/len(prots))
    return nplots