def qc_cumAvg( X, axis=0, silent=1, axs=None, nMax=int(1E6), ybin=None, # axiS = [2,None,None] ): '''Calcuate average and standard error for bootstrapped statistics. ''' # axis = 0 # X = egList[:5] X = np.array(X) X = np.moveaxis(X, axis, 0) # L = np.shape(X)[axis] X = np.reshape(X, (len(X), -1)) if len(X.T) > nMax: ind = np.random.randint(0, len(X.T), nMax) X = X[:, ind] L = len(X) Lx = (1 + np.arange(L))[:, None] Ex = np.cumsum(X, axis=axis) / Lx Ex2 = np.cumsum(np.square(X), axis=axis) / Lx M = Ex VAR = (Ex2 - np.square(Ex)) SD = np.sqrt(VAR) SE = SD / np.sqrt(Lx) CV = SE / M CV = abs(CV) Lx = np.broadcast_to(Lx, SE.shape) if not silent: if axs is None: fig, axs = plt.subplots(1, 3, figsize=[14, 4]) axs = [None, None, axs[0]] X, Y = Lx, CV ybin = np.linspace(*pyutil.span(Y, 99.), num=80) if ybin is None else ybin xbin = np.arange(0.5, L + 1, 1) pyvis.qc_2var( X, Y, # axs=axs, xbin=xbin, ybin=ybin, ylab=r'$\left| {StdErr} / {Mean} \right |$', xlab='sample size', axs=axs, ) return (M, SE, CV, Lx), axs
def plotModel(m, X): clu = m.predict(X) print sorted(m.weights_)[::-1] pyvis.qc_2var(X.T[0], X.T[1], clu=clu) _ = m.predict(X) Y = m.x_post.sample(len(X)).eval() # Y = toyData(Cs= m.covariances_, # mus = m.means_, # pi = m.weights_, # K=3) pyvis.qc_2var( Y.T[0], Y.T[1], )
def qc_Avg( C, silent=1, axis=1, # nMax = 150, ### depracated size check **kwargs): # if axs is None: # if not silent: # fig,axs= plt.subplots(1,3,figsize=[14,3]) C = np.array(C) # assert C.shape[axis]<nMax MEAN = C.mean(axis=axis, ) STD = C.std(axis=axis, ) # plt.hist(X) def parseBedmap # plt.hist(X[1]) # X = MEAN[None,:] X = MEAN MIN, MAX = X.min(), np.percentile(X, 99) BINS = np.linspace(MIN, MAX, 100) CV = STD / MEAN if not silent: xs, ys = MEAN, STD axs = pyvis.qc_2var(xs, ys, xlab='$E(X)$', ylab='$Std(X)$', **kwargs) else: axs = [] return (MEAN, STD, CV), axs
def qc_PCA(df, xi=0, yi=1, xlab=None, ylab=None, **kwargs): res = smod.fit_PCA(df) xs, ys = res.trans_data.T[[xi, yi]] if xlab is None: xlab = 'PC%d' % xi if ylab is None: ylab = 'PC%d' % yi axs = pyvis.qc_2var(xs, ys, xlab=xlab, ylab=ylab, **kwargs) return res, axs
print(pyext.np.sum(sel)) df = df.loc[sel].to_csv('temp.bed', sep='\t', header=None) [bwFiles] res = synotil.dio.extract_bigwig_multiple(bedFile='temp.bed', outIndex=DATA_ACC_LIST, bwFiles=bwFiles, radius=300, stepSize=10) tab = colGroupMean(res) tab = tab.apply(pyext.log2p1) xs = (tab['189CS10']) - (tab['189CS11']) ys = tab['192CS17'] - tab['192CS18'] clu = (xs + ys) > 2 pyvis.qc_2var(xs, ys, nMax=-1, xlim=[-2, 4], ylim=[-2, 4], clu=clu) df = pyext.readData('temp.bed', columns=pyext.columns.bed, guess_index=0).set_index('acc', drop=0) # OUTPUT_FILE = 'OUTPUT/0918-elf3target.bed' pyext.dir__real(dirname="OUTPUT") [OUTPUT_BED_FILE] df.loc[clu].to_csv(OUTPUT_BED_FILE, sep='\t', header=None, index=0) [OUTPUT_CSV_FILE] df.loc[clu].to_csv(OUTPUT_CSV_FILE, sep=',', header=None, index=0) #### making gene list FNAME = OUTPUT_BED_FILE
def job__chipTargPaired( bwCurr=None, bwMeta=None, control=None, treatment=None, xlab=None, ylab=None, name=None, # bwMeta, NCORE=2, params__peakBW=None, CUTOFF_FC=3.0, CUTOFF_CHIPDIFF=0.7, innerRadius=100, ): figs = pyutil.collections.OrderedDict() if control is not None and treatment is not None: xlab, ylab = control, treatment if xlab is None or ylab is None: xlab, ylab = bwCurr.index elif bwCurr is None: bwCurr = bwMeta.reindex([xlab, ylab]) if params__peakBW is None: params__peakBW = dict( outerRadius=500, innerRadius=innerRadius, NCORE=NCORE, outIndex=bwCurr.header, # detailByCHIP = 0, ) params__peakBW['innerRadius'] = innerRadius if name is None: name = '{xlab}-{ylab}'.format(**locals()) # bwCurr = bwMeta # bwCurr = bwCurr.loc[[xlab,ylab]] # bwCurr.npkFile dfs = map( sdio.extract_peak, bwCurr.npkFile, ) fig, ax = plt.subplots(1, 1, figsize=[7, 7]) # ax = plt.gca() for df in dfs: df['per_FC'] = pyutil.dist2ppf(df.FC) df.plot.scatter('per_FC', 'FC', ax=ax) fnames = [ pyutil.queryCopy(infile=fname, query='FC>%.3f' % CUTOFF_FC, reader=sdio.extract_peak, inplace=False) for fname in bwCurr.npkFile ] # dfs[1] peakFlat = ' '.join(fnames) ofname = '%s-combined.bed' % ('-'.join(bwCurr.index)) pyutil.shellexec('cat {peakFlat}>{ofname}'.format(**locals())) ofname = sdio.npk_expandSummit(fname=ofname, radius=1) pyutil.lineCount(ofname) peakFileOrig = peakFile = ofname res = sjob.figs__peakBW(peakFile=peakFile, bwFiles=bwCurr.RPKMFile, name=name, **params__peakBW) figs.update(res[0]) bwTrack, bwAvg = res[1] bwAvg.columns = bwAvg.columns.map( pyutil.df2mapper(bwCurr, 'header', 'index').get) # .set_index('RPKMFile').loc[bwAvg.columns]. # bwAvg.columns = bwCurr.index xs, ys = bwAvg[[xlab, ylab]].values.T # clu = None # peakIndex = pyutil.df__pad(bwAvg).query(query).index clu = pd.DataFrame(pyutil.df__pad(bwAvg)) query = ' val_{ylab} - val_{xlab} > {CUTOFF_CHIPDIFF} '.format(**locals()) qsans = pyutil.sanitise_query(query) peakIndex = clu.query(query).index clu['clu'] = clu.eval('index in @peakIndex') stats = sdio.extract_peak(peakFile).set_index('acc', drop=0) stats['CHIPDIFF'] = clu.eval(query.split('>')[0]) pyvis.qc_2var(xs, ys, clu=clu.clu, xlab=xlab, ylab=ylab) figs['scatterPlot__%s' % name] = plt.gcf() cluFile = ofname = qsans + '.csv' clu.to_csv(ofname) print(ofname, pyutil.lineCount(ofname)) peakBase = pyutil.getBname(peakFile) ofname = '{peakBase}-{qsans}.bed'.format(**locals()) peakFile = pyutil.to_tsv(stats.reindex(peakIndex), ofname) pyutil.shellexec('mkdir -p output/') pyutil.file__link(ofname, 'output/%s.bed' % name, force=True) # peakFile = pyutil.queryCopy(peakFile, # query='acc in @peakIndex', # reader=sdio.extract_peak, # peakIndex=peakIndex, # ) # peakFile = '{peakFile}-{qsans}.bed' # pyutil.fileDict__main(ofname='FILE.json', # **pyutil.dictFilter(locals(), # keys=['cluFile','peakFile', # 'peakFileOrig'] # )) pyutil.fileDict__save(d=locals(), keys=['cluFile', 'peakFile', 'peakFileOrig'], fname='FILE.json') return figs, clu
# In[ ]: m = mym.GMM_VIMAP(D=2) X = np.random.random(size=(500, 2)) m.fit(X) plotModel(m, X) m_diag = m # In[ ]: X = toyData(K=3) # print X.shape pyvis.qc_2var( X.T[0], X.T[1], ) # In[ ]: m = mym.GMM_VIMAP(D=2, K=3, name='testB') m.fit(X) plotModel(m, X) # In[ ]: mi = 5 m = mym.GMMLRP_VIMAP(name='t%d' % mi, D=2, K=3).init_model() m.fit(X=X) plotModel(m, X)
ax = axs[i] plt.sca(ax) prof.plot(xticks=range(len(tdf.columns)), rot='vertical') i += 1 ax = axs[i] plt.sca(ax) per_score = pd.Series(per_score, tdf.index) clu = per_score > 0.95 ##### adding diagnostic plots xs, ys = tdf.summary.MSQ, score pyvis.qc_2var(xs, ys, axs=[None, ax, None, None], clu=clu, nMax=len(clu)) pyvis.add_text(xs, ys, keyDF.BioName, ax=ax) figs['qc_TempReponse'] = fig stats['tempResponsive'] = clu scores['tempResponsive'] = per_score ###------------------------------------------- ############################################## ############################################## ###------------------------------------------- _ = ''' For each gene derive its PIF7-knockout responsiveness by calculating at its dot-product similarity with a set of