예제 #1
0
def valid_fastq(
    DATA_ACC="DATA_ACC",
    rawMeta="rawMeta",
):
    _ctf()
    assert DATA_ACC() is not None

    def rawFile__validateChunk(dfc, idKeys=None):
        '''Validate the df_raw
    '''
        if idKeys is None:
            idKeys = ['runID', 'sampleID']
        idKeys = list(idKeys)
        dfc = dfc.sort_values(idKeys + ['read', 'chunk'])
        gp = dfc.groupby(idKeys + ['read'])
        for (key, df) in gp:
            assert len(df) == 4, key
        if not dfc['ext'].iloc[0].startswith('.'):
            dfc['ext'] = dfc['ext'].map(lambda x: '.%s' % x)
        dfc['fnameCombined'] = pyext.df__paste0(
            dfc,
            idKeys + ['read', 'ext'],
            sep='_',
        )
        dfc['fnameCombinedSize'] = 0
        return dfc

    rawCurr = rawMeta().loc[rawMeta()["DATAACC"] == DATA_ACC()]
    # rawCurr = rawMeta.query(query)
    rawCurr = pd.concat(
        [rawCurr, rawCurr['BASENAME'].str.extract(synotil.ptn.baseSpace)],
        axis=1)
    rawCurr['fname'] = rawCurr['FULL_PATH']

    rawCurr = rawCurr.sort_values(['DATAACC', 'BASENAME', 'SIZE'],
                                  ascending=False).groupby(
                                      ['BASENAME', 'DATAACC']).first()
    rawCurr = rawCurr.sort_values(['BASENAME'])

    DF_LIST = list(
        x for x in rawCurr.groupby(['RUN_ID', 'SAMPLE_ID', 'read', 'ext']))
    for (_, _, read, ext), df in DF_LIST:
        assert int(read) in [1, 2], (
            DATA_ACC(),
            read,
        )
        assert ext in ["fastq", "fastq.gz"], (
            DATA_ACC(),
            ext,
        )
        assert len(df) in [1, 4], pyext.ppJson((DATA_ACC(), len(df), df[[
            'FULL_PATH',
        ]].values))


#         assert df.read.un
    return dict(
        DATA_ACC=DATA_ACC(),
        DF_LIST=DF_LIST,
    )
예제 #2
0
    def venn_diagram(cls,d,context):
        d = tree__worker__interpret(d,context)
        import pymisca.proba
        d_ax = cls.dict__castAxis(d.get('axis',{}))
        OFNAME = d.get('OFNAME',None) 
        assert OFNAME,(pyext.ppJson(d),)
        
        
        d['index1']= pd.Index(d['index1']).dropna()
        d['index2']= pd.Index(d['index2']).dropna()
        if d.get('index_bkgd',None) is not None:
            pass
        else:
            d['index_bkgd'] = d['index1'] | d['index2']
        d['index_bkgd'] = pd.Index(d['index_bkgd']).dropna()
        
#         d['title'] = d.get('title', "Fisher exact test: p={pval}")
        fig, ax = plt.subplots(1,1,figsize=d_ax['figsize'])

        testResult = pymisca.proba.index__getFisher(cluIndex=d['index1'], 
                                                    featIndex=d['index2'])
        pval = '%.3E'%testResult['p']
        ax= plt.gca()
        res = pyvis.qc_index(d['index1'],d['index2'],
                             xlab=d_ax['xlabel'],ylab=d_ax['ylabel'],silent=0,ax=ax);
        ax.set_title("Fisher exact test: p={pval}".format(**locals()))
        # ax.set_title(d_ax['title'].format(**locals()))    
        
        cls.fig__save(fig,OFNAME)
        res = cls.html__tableLine(OFNAME)
        
        return res
예제 #3
0
def job_process(d,context=None):
    if context is None:
        context = pymisca.header.get__frameDict(level=1)
    _ = pyext.printlines([pyext.ppJson(d)],d['OFNAME']+'.json')
    d['FUNCTION'] = tree__worker__interpret(d['FUNCTION'],context)
    res = d['FUNCTION'](d,context) 
    return res
예제 #4
0
    def boxplot(cls,d,context):
        
        # assert "get__fcValues" in context
        d = tree__worker__interpret(d,context)
        OFNAME = d.get('OFNAME',None) 
        assert OFNAME,(pyext.ppJson(d),)

        d_ax = d.get('axis',{})
        d_ax = cls.dict__castAxis(d_ax)
#         ylim = d_ax.get('ylim',[])
#         ylabel = d_ax.get('ylabel',None)
#         figsize = d_ax.get('figsize',None)

        fig, ax = plt.subplots(1,1,figsize=d_ax['figsize'])


        if d_ax['ylim']:
            ax.set_ylim(d_ax['ylim'])

        if d_ax['ylabel']:
            ax.set_ylabel(d_ax['ylabel'])

        # d['datasets'] = 
        res = [pd.Series(_d['value'],name=_d['label']) for _d in d['datasets']]
        res = pd.DataFrame(res).T
        d['_df'] = res
        import scipy.stats
        # .ttest_rel

        # INDEX_FILE = '/home/feng/static/figures/1126__PIF7__tempResp-AND-pif7Resp/Venn-index.csv'
        # pyext.MDFile('/home/feng/static/figures/1126__PIF7__tempResp-AND-pif7Resp/Venn-index.csv')
#         index : "!{pyext.readData('/home/feng/static/figures/1126__PIF7__tempResp-AND-pif7Resp/Venn-index.csv',)['ind2'].dropna()}"
        # index = pyext.readData('/home/feng/static/results/0206__heatmap__PIF7/clu.csv').query('clu==7').index
        # print len(index)
        df = d['_df']
        index = d.get('index',[])
        if len(index):
            df = df.reindex(index)
        # testResult = scipy.stats.ttest_rel(*df.values.T[:2])
        testResult = scipy.stats.ttest_ind(*df.values.T[:2])

        ax.set_title('''
        independent-t-test-between-two-leftmost-samples
        p={testResult.pvalue:.3E}
        N={df.shape[0]}
        '''.format(**locals()))
        df.boxplot(rot='vertical',ax=ax)
        
        pyext.fig__save(fig,OFNAME)
#         fig.savefig(OFNAME)
        res = cls.html__tableLine(OFNAME)

        return res
예제 #5
0
 def job_saveFig(
     figs,
     DIR,
     templateFile,
     exts=[
         'png',
     ],
     dpi=160,
 ):
     templateFile = str(templateFile)
     dfig = saveFigDict(figs, DIR='.', exts=exts, dpi=dpi)
     dfig['fignames'] = [
         x for x in dfig['fignames'] if x.endswith('.png')
     ]
     buf = [pyext.ppJson(dfig)]
     ofname = 'figures.json'
     pyext.printlines(buf, ofname)
     return dfig