def test_count(data, opts): """ Make a test for all genes iteratively. @args data: Store all input data and results @type data: Class object @args opts: Input argument to the main TE function @type opts: Instance """ print 'Start the statistical test.' num = len(data.geneIDs) pval = np.empty((num, 1)) pval.fill(np.nan) explanatory0 = cm.create_matrix(data, model='H0') explanatory1 = cm.create_matrix(data, model='H1') librarySizes = np.hstack([data.libSizesRibo, data.libSizesRna]) lenSampleRibo = data.idxRibo.size lenSampleRna = data.idxRna.size for i in range(num): sys.stdout.flush() if i % 50 == 0: print '\r%i genes finished...' % i , if i+1 == num: print '\r%i genes finished.' % num if opts.dispDiff and np.isnan(data.dispAdjRibo[i]): continue if not opts.dispDiff and np.isnan(data.dispAdj[i]): continue response = np.hstack([data.countRibo[i, :], data.countRna[i, :]]) if opts.dispDiff: disp = np.hstack([np.repeat(data.dispAdjRibo[i], lenSampleRibo), np.repeat(data.dispAdjRna[i], lenSampleRna)]) else: disp = data.dispAdj[i] modNB0 = sm.GLM(response, explanatory0, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes)) modNB1 = sm.GLM(response, explanatory1, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes)) result0 = modNB0.fit() result1 = modNB1.fit() pval[i] = 1 - chi2.cdf(result0.deviance - result1.deviance, explanatory1.shape[1] - explanatory0.shape[1]) data.pval = pval return data
def estimate_disp(data, opts): """ Create explanatory matrix and estimate dispersion. Temporarily save data in ./TmpData.pkl file. @args data: Store all input data and results @type data: Class object @args opts: Input arguments to the main TE function @type opts: Instance """ explanatory = cm.create_matrix(data, model='H1') data.matrix = explanatory outpath = opts.resPath pklFile = outpath + 'TmpData.pkl' if opts.dispDiff: data = rd.disper_raw(data, opts) else: data = rd.disper_raw_scalar(data, opts) with open(pklFile, 'wb') as FileOut: pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL) print '*' * 25 data = fd.disper_fit(data, opts) with open(pklFile, 'wb') as FileOut: pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL) print '*' * 25 if opts.dispDiff: data = ad.disper_adj(data, opts) else: data = ad.disper_adj_scalar(data, opts) with open(pklFile, 'wb') as FileOut: pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL) return data
def estimate_disp(data, opts): """ Create explanatory matrix and estimate dispersion. Temporarily save data in ./TmpData.pkl file. @args data: Store all input data and results @type data: Class object @args opts: Input arguments to the main TE function @type opts: Instance """ explanatory = cm.create_matrix(data, model='H1') data.matrix = explanatory outpath = opts.resPath pklFile = outpath + 'TmpData.pkl' if opts.dispDiff: data = rd.disper_raw(data, opts) else: data = rd.disper_raw_scalar(data, opts) with open(pklFile, 'wb') as FileOut: pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL) print '*'*25 data = fd.disper_fit(data, opts) with open(pklFile, 'wb') as FileOut: pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL) print '*'*25 if opts.dispDiff: data = ad.disper_adj(data, opts) else: data = ad.disper_adj_scalar(data, opts) with open(pklFile, 'wb') as FileOut: pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL) return data
def test_count(data, opts): """ Make a test for all genes iteratively. @args data: Store all input data and results @type data: Class object @args opts: Input argument to the main TE function @type opts: Instance """ print 'Start the statistical test.' num = len(data.geneIDs) pval = np.empty((num, 1)) pval.fill(np.nan) explanatory0 = cm.create_matrix(data, model='H0') explanatory1 = cm.create_matrix(data, model='H1') librarySizes = np.hstack([data.libSizesRibo, data.libSizesRna]) lenSampleRibo = data.idxRibo.size lenSampleRna = data.idxRna.size errorCnt = 0 for i in range(num): if opts.dispDiff and np.isnan(data.dispAdjRibo[i]): continue if not opts.dispDiff and np.isnan(data.dispAdj[i]): continue response = np.hstack([data.countRibo[i, :], data.countRna[i, :]]) if opts.dispDiff: disp = np.hstack([ np.repeat(data.dispAdjRibo[i], lenSampleRibo), np.repeat(data.dispAdjRna[i], lenSampleRna) ]) else: disp = data.dispAdj[i] try: modNB0 = sm.GLM(response, explanatory0, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes)) modNB1 = sm.GLM(response, explanatory1, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes)) result0 = modNB0.fit() result1 = modNB1.fit() except sm.tools.sm_exceptions.PerfectSeparationError: errorCnt += 1 else: if not opts.dispDiff: pval[i] = 1 - chi2.cdf( result0.deviance - result1.deviance, explanatory1.shape[1] - explanatory0.shape[1]) elif opts.dispDiff: pval[i] = 1 - chi2.cdf( result0.deviance - result1.deviance, (explanatory1.shape[1] - explanatory0.shape[1]) / 2.5) else: pass data.pval = pval sys.stdout.write( 'Warning: Failed to do test: %i genes. P value set to \'nan\'.\n' % errorCnt) return data
def test_count(data, opts): """ Make a test for all genes iteratively. @args data: Store all input data and results @type data: Class object @args opts: Input argument to the main TE function @type opts: Instance """ print 'Start the statistical test.' num = len(data.geneIDs) pval = np.empty((num, 1)) pval.fill(np.nan) explanatory0 = cm.create_matrix(data, model='H0') explanatory1 = cm.create_matrix(data, model='H1') librarySizes = np.hstack([data.libSizesRibo, data.libSizesRna]) lenSampleRibo = data.idxRibo.size lenSampleRna = data.idxRna.size for i in range(num): sys.stdout.flush() if i % 50 == 0: print '\r%i genes finished...' % i, if i + 1 == num: print '\r%i genes finished.' % num if opts.dispDiff and np.isnan(data.dispAdjRibo[i]): continue if not opts.dispDiff and np.isnan(data.dispAdj[i]): continue response = np.hstack([data.countRibo[i, :], data.countRna[i, :]]) if opts.dispDiff: disp = np.hstack([ np.repeat(data.dispAdjRibo[i], lenSampleRibo), np.repeat(data.dispAdjRna[i], lenSampleRna) ]) else: disp = data.dispAdj[i] modNB0 = sm.GLM(response, explanatory0, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes)) modNB1 = sm.GLM(response, explanatory1, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes)) result0 = modNB0.fit() result1 = modNB1.fit() pval[i] = 1 - chi2.cdf(result0.deviance - result1.deviance, explanatory1.shape[1] - explanatory0.shape[1]) data.pval = pval return data