def valid_fastq( DATA_ACC="DATA_ACC", rawMeta="rawMeta", ): _ctf() assert DATA_ACC() is not None def rawFile__validateChunk(dfc, idKeys=None): '''Validate the df_raw ''' if idKeys is None: idKeys = ['runID', 'sampleID'] idKeys = list(idKeys) dfc = dfc.sort_values(idKeys + ['read', 'chunk']) gp = dfc.groupby(idKeys + ['read']) for (key, df) in gp: assert len(df) == 4, key if not dfc['ext'].iloc[0].startswith('.'): dfc['ext'] = dfc['ext'].map(lambda x: '.%s' % x) dfc['fnameCombined'] = pyext.df__paste0( dfc, idKeys + ['read', 'ext'], sep='_', ) dfc['fnameCombinedSize'] = 0 return dfc rawCurr = rawMeta().loc[rawMeta()["DATAACC"] == DATA_ACC()] # rawCurr = rawMeta.query(query) rawCurr = pd.concat( [rawCurr, rawCurr['BASENAME'].str.extract(synotil.ptn.baseSpace)], axis=1) rawCurr['fname'] = rawCurr['FULL_PATH'] rawCurr = rawCurr.sort_values(['DATAACC', 'BASENAME', 'SIZE'], ascending=False).groupby( ['BASENAME', 'DATAACC']).first() rawCurr = rawCurr.sort_values(['BASENAME']) DF_LIST = list( x for x in rawCurr.groupby(['RUN_ID', 'SAMPLE_ID', 'read', 'ext'])) for (_, _, read, ext), df in DF_LIST: assert int(read) in [1, 2], ( DATA_ACC(), read, ) assert ext in ["fastq", "fastq.gz"], ( DATA_ACC(), ext, ) assert len(df) in [1, 4], pyext.ppJson((DATA_ACC(), len(df), df[[ 'FULL_PATH', ]].values)) # assert df.read.un return dict( DATA_ACC=DATA_ACC(), DF_LIST=DF_LIST, )
def venn_diagram(cls,d,context): d = tree__worker__interpret(d,context) import pymisca.proba d_ax = cls.dict__castAxis(d.get('axis',{})) OFNAME = d.get('OFNAME',None) assert OFNAME,(pyext.ppJson(d),) d['index1']= pd.Index(d['index1']).dropna() d['index2']= pd.Index(d['index2']).dropna() if d.get('index_bkgd',None) is not None: pass else: d['index_bkgd'] = d['index1'] | d['index2'] d['index_bkgd'] = pd.Index(d['index_bkgd']).dropna() # d['title'] = d.get('title', "Fisher exact test: p={pval}") fig, ax = plt.subplots(1,1,figsize=d_ax['figsize']) testResult = pymisca.proba.index__getFisher(cluIndex=d['index1'], featIndex=d['index2']) pval = '%.3E'%testResult['p'] ax= plt.gca() res = pyvis.qc_index(d['index1'],d['index2'], xlab=d_ax['xlabel'],ylab=d_ax['ylabel'],silent=0,ax=ax); ax.set_title("Fisher exact test: p={pval}".format(**locals())) # ax.set_title(d_ax['title'].format(**locals())) cls.fig__save(fig,OFNAME) res = cls.html__tableLine(OFNAME) return res
def job_process(d,context=None): if context is None: context = pymisca.header.get__frameDict(level=1) _ = pyext.printlines([pyext.ppJson(d)],d['OFNAME']+'.json') d['FUNCTION'] = tree__worker__interpret(d['FUNCTION'],context) res = d['FUNCTION'](d,context) return res
def boxplot(cls,d,context): # assert "get__fcValues" in context d = tree__worker__interpret(d,context) OFNAME = d.get('OFNAME',None) assert OFNAME,(pyext.ppJson(d),) d_ax = d.get('axis',{}) d_ax = cls.dict__castAxis(d_ax) # ylim = d_ax.get('ylim',[]) # ylabel = d_ax.get('ylabel',None) # figsize = d_ax.get('figsize',None) fig, ax = plt.subplots(1,1,figsize=d_ax['figsize']) if d_ax['ylim']: ax.set_ylim(d_ax['ylim']) if d_ax['ylabel']: ax.set_ylabel(d_ax['ylabel']) # d['datasets'] = res = [pd.Series(_d['value'],name=_d['label']) for _d in d['datasets']] res = pd.DataFrame(res).T d['_df'] = res import scipy.stats # .ttest_rel # INDEX_FILE = '/home/feng/static/figures/1126__PIF7__tempResp-AND-pif7Resp/Venn-index.csv' # pyext.MDFile('/home/feng/static/figures/1126__PIF7__tempResp-AND-pif7Resp/Venn-index.csv') # index : "!{pyext.readData('/home/feng/static/figures/1126__PIF7__tempResp-AND-pif7Resp/Venn-index.csv',)['ind2'].dropna()}" # index = pyext.readData('/home/feng/static/results/0206__heatmap__PIF7/clu.csv').query('clu==7').index # print len(index) df = d['_df'] index = d.get('index',[]) if len(index): df = df.reindex(index) # testResult = scipy.stats.ttest_rel(*df.values.T[:2]) testResult = scipy.stats.ttest_ind(*df.values.T[:2]) ax.set_title(''' independent-t-test-between-two-leftmost-samples p={testResult.pvalue:.3E} N={df.shape[0]} '''.format(**locals())) df.boxplot(rot='vertical',ax=ax) pyext.fig__save(fig,OFNAME) # fig.savefig(OFNAME) res = cls.html__tableLine(OFNAME) return res
def job_saveFig( figs, DIR, templateFile, exts=[ 'png', ], dpi=160, ): templateFile = str(templateFile) dfig = saveFigDict(figs, DIR='.', exts=exts, dpi=dpi) dfig['fignames'] = [ x for x in dfig['fignames'] if x.endswith('.png') ] buf = [pyext.ppJson(dfig)] ofname = 'figures.json' pyext.printlines(buf, ofname) return dfig