def anova2(values, factor1, factor2, factor1name="factor1", factor2name="factor2", interaction=True): """ python wrapper for a two-way anova in R with optional interaction term ( default=True ) """ # build a dataframe for R dataframe = {} dataframe["feature"] = values dataframe["factor1"] = factor1 dataframe["factor2"] = factor2 r.assign("df", dataframe) r("df$factor1 <- factor( df$factor1 )") r("df$factor2 <- factor( df$factor2 )") # run the model results = r("anova( lm( df$feature ~ df$factor1 %s df$factor2 ) )" % ("*" if interaction else "+")) r("rm( list=ls() )") # convert R results to table colheads = ["Df", "Sum Sq", "Mean Sq", "F value", "Pr( >F )"] rowheads = [factor1name, factor2name] rowheads += ["int term", "error"] if interaction else ["error"] ndictData = {} for rowhead in results.keys(): for index, name in zip(range(len(rowheads)), rowheads): dictName = ndictData.setdefault(name, {}) dictName[rowhead] = results[rowhead][index] # return as zopy table return nesteddict2table(ndictData, rowheads, colheads)
def r_to_str(robj): "Returns an R object in a representation as a list of strings." from rpy import r from tempfile import mktemp tmpfile = mktemp() #logging.info('Tmpfile: %s' % tmpfile) try: r.assign('tmpobj', robj) r('save(tmpobj, file="%s", ascii=TRUE)' % tmpfile) return open(tmpfile).read() finally: if os.access(tmpfile, os.R_OK): os.remove(tmpfile)
def platt_opts(light, params): """ Adjust `opt` values of PAR levels following the Platt model. Parameters ---------- light : arr Generally PAR values. Where Photosynthetic Active Radiance interfer on Primary Production. 'light' is this parameter. params: arr Containing values of (alpha, Beta, etrmax). Returns ------- opts : arr Values optimized according to `params`and list of PAR levels. """ opts = [] r.assign("light", light[~np.isnan(light)]) r.assign("params", params) # if opt == None: # r.assign("opt", light[~np.isnan(light)]) # else: # r.assign("opt", opt[~np.isnan(opt)]) # if ini == None: # r.assign('ini', [0.4,1.5,1500]) # else: # r.assign('ini', np.array(ini)) # op, platt_param = platt(light,etr, ini=ini) # r.assign('platt_param', platt_param) min_opt = r(""" min_opt<-function(light,params){ alpha<-params[1] Beta<-params[2] Ps<-params[3] return( ( (Ps*(1-exp(-alpha*light/Ps)) *exp(-Beta*light/Ps)) ) ) }""") opts = np.append(opts, r('min_opt(light, params)')) return opts
def anova(values, factor1, factor1name="factor1"): """ python wrapper for a one-way ANOVA in R """ # build a dataframe for R dataframe = {} dataframe["feature"] = values dataframe["factor1"] = factor1 r.assign("df", dataframe) r("df$factor1 <- factor( df$factor1 )") # run the model results = r("anova( lm( df$feature ~ df$factor1 ) )") r("rm( list=ls() )") # convert R results to table colheads = ["Df", "Sum Sq", "Mean Sq", "F value", "Pr( >F )"] rowheads = [factor1name, "error"] ndictData = {} for rowhead in results.keys(): for index, name in zip(range(len(rowheads)), rowheads): dictName = ndictData.setdefault(name, {}) dictName[rowhead] = results[rowhead][index] # return as zopy table return nesteddict2table(ndictData, rowheads, colheads)
def plot(self, hardcopy = None): if hardcopy: R.png(hardcopy, width=1024, height=768, type="cairo") R.require('qvalue') # build a qobj R.assign( "pval", self.mPValues ) R.assign( "pi0", self.mPi0 ) R.assign( "qval", self.mQValues ) R.assign( "lambda", self.mLambda ) R("""qobj <-list( pi0=pi0, qvalues=qval, pvalues=pval, lambda=lambda)""") R(""" class(qobj) <- "qvalue" """) R("""qplot(qobj)""") if hardcopy: R.dev_off()
def plot(self, hardcopy=None): if hardcopy: R.png(hardcopy, width=1024, height=768, type="cairo") R.require('qvalue') # build a qobj R.assign("pval", self.mPValues) R.assign("pi0", self.mPi0) R.assign("qval", self.mQValues) R.assign("lambda", self.mLambda) R("""qobj <-list( pi0=pi0, qvalues=qval, pvalues=pval, lambda=lambda)""" ) R(""" class(qobj) <- "qvalue" """) R("""qplot(qobj)""") if hardcopy: R.dev_off()
def smooth_data(data): sample_data=data[0] window_size=data[1] for rep_num in range(sample_data.get_number_of_replicates()): for chrom in sample_data.get_chromosome_list(): met_manager = sample_data.get_manager_of_chrom(chrom) pos=[] m=[] cov=[] for methyl_c in met_manager: pos.append(methyl_c.position) m.append(methyl_c.get_methylrate(rep_num)) cov.append(methyl_c.get_coverage(rep_num)) r.warnings() r.library("locfit") r.assign("pos",pos) r.assign("m",m) r.assign("cov",cov) r.assign("h",window_size) r("posm=data.frame(pos,m)") r("fit=locfit(m~lp(pos,h=h),data=posm,maxk=1000000,weights=cov)") r("pp=preplot(fit,where='data',band='local',newdata=data.frame(pos=pos))") fit=r("pp")["fit"] list=r("unlist(pp$xev$xev)") for i, each in enumerate(list): position=int(each[0]) methyl_c=met_manager.get_methyl_c(position) if methyl_c: smoothedrate=None if 1 <= fit[i]: smoothedrate=1 elif fit[i] <= 0: smoothedrate=0 else: smoothedrate=fit[i] methyl_c.update_methylrate(rep_num,smoothedrate) else: sys.stderr.write("methyl_c doesn't exist at %d",position) sys.exit(1)
def smoothPValues(pvalues, vlambda=numpy.arange(0, 0.95, 0.05), smooth_df=3, smooth_log_pi0=False): if min(pvalues) < 0 or max(pvalues) > 1: raise ValueError("p-values out of range") if len(vlambda) > 1 and len(vlambda) < 4: raise ValueError( " If length of vlambda greater than 1, you need at least 4 values." ) if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1): raise ValueError("vlambda must be within [0, 1).") m = len(pvalues) pi0 = numpy.zeros(len(vlambda), numpy.float) for i in range(len(vlambda)): pi0[i] = numpy.mean([x >= vlambda[i] for x in pvalues]) / (1.0 - vlambda[i]) R.assign("pi0", pi0) R.assign("vlambda", vlambda) print "pi0=", pi0 if smooth_log_pi0: pi0 = math.log(pi0) R.assign("smooth_df", smooth_df) spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""") pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""") print spi0 if smooth_log_pi0: pi0 = math.exp(pi0) return pi0
def smoothPValues( pvalues, vlambda=numpy.arange(0,0.95,0.05), smooth_df = 3, smooth_log_pi0 = False): if min(pvalues) < 0 or max(pvalues) > 1: raise ValueError( "p-values out of range" ) if len(vlambda) > 1 and len(vlambda) < 4: raise ValueError(" If length of vlambda greater than 1, you need at least 4 values." ) if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1): raise ValueError( "vlambda must be within [0, 1).") m = len(pvalues) pi0 = numpy.zeros( len(vlambda), numpy.float ) for i in range( len(vlambda) ): pi0[i] = numpy.mean( [x >= vlambda[i] for x in pvalues ]) / (1.0 -vlambda[i] ) R.assign( "pi0", pi0) R.assign( "vlambda", vlambda) print "pi0=", pi0 if smooth_log_pi0: pi0 = math.log(pi0) R.assign( "smooth_df", smooth_df) spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""") pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""") print spi0 if smooth_log_pi0: pi0 = math.exp(pi0) return pi0
def etr_plot(curves, indir='.', subplot=False): ''' Relative Electron Transference Rate plots. INPUT ----- curves : list of curves indir : directory to save plots. Default current directory subplot : Default False. OUTPUT ------ Return = Figure saved (png) ''' light = [] etr = [] opts = [] x = [] y = [] if type(curves) != list: raise TypeError('variable "curves", must be a list') for ii, cur in enumerate(curves): light.append(np.float64(cur['PAR'])) etr.append(np.float64(cur['ETR1']) / 1000.) etr.append(np.float64(cur['ETR2']) / 1000.) etr.append(np.float64(cur['ETR3']) / 1000.) etr.append(np.float64(cur['ETR4']) / 1000.) for value in etr: r.assign("x", light[0]) r.assign("y", value) min_platt = r(""" platt<- function(params){ alpha<-params[1] Beta<- params[2] Ps<- params[3] return( sum( (y-Ps*(1-exp(-alpha*x/Ps))*exp(-Beta*x/Ps))^2)) } """) min_adp = r(""" min_ad<-function(params){ alpha<-params[1] Beta<-params[2] Ps<-params[3] return( ( (Ps*(1-exp(-alpha*x/Ps)) *exp(-Beta*x/Ps)) )) }""") r('etr_sim<-optim(par=c(0.4, 1.5 , 80),fn=platt)') opts.append(r('min_ad(par = etr_sim$par)')) if subplot == True: #plt.subplot(6,5,cur+1) for i in xrange(len(opts)): #plt.figure(figsize=(8, 6), dpi=80) plt.subplot(6, 5, ii + 1) plt.subplots_adjust( wspace=0.2, hspace=0.7) #adjust spaces between subplots plt.plot(light[0], etr[i], 'o', color=str(colors[i])) plt.ylim(0, 100) plt.xlabel('Light (PAR)') plt.ylabel(u'(rETR)') plt.plot(np.sort(light[0]), np.sort(opts[i]), color=str(colors[i]), label=str(compr[i])) plt.legend( loc='upper left', prop={'size': 7} ) #markerscale=0.1, borderpad=0.1, labelspacing=0.1, mpl.font_manager.FontManager(size=10))#,ncol=4,prop=font_manager.FontProperties(size=10)) plt.title(str(cur['Date'][0] + ' ' + cur['Time'][0])) light = [] etr = [] e_sim = [] opts = [] x = [] y = [] if subplot == True: plt.savefig( os.path.join(indir + str(cur['Date'][0] + '_' + cur['Time'][0] + '.png'))) plt.show() return opts
def main(): parser = E.OptionParser( version="%prog version: $Id: rates2rates.py 2781 2009-09-10 11:33:14Z andreas $", usage=globals()["__doc__"]) parser.add_option("--output-filename-pattern", dest="output_filename_pattern", type="string", help="pattern for additional output files [%default].") parser.add_option("--input-filename-neutral", dest="input_filename_neutral", type="string", help="a tab-separated file with rates and G+C content in neutrally evolving regions [%default].") parser.set_defaults( input_filename_neutral=None, output_filename_pattern="%s", normalize=True, hardcopy=None, ) (options, args) = E.Start(parser, add_csv_options=True) if not options.input_filename_neutral: raise ValueError("please supply a file with neutral rates.") lines = options.stdin.readlines() if len(lines) == 0: raise IOError("no input") from rpy import r as R import rpy R.png(options.output_filename_pattern % "fit" + ".png", width=1024, height=768, type="cairo") matrix, headers = readRates(open(options.input_filename_neutral, "r")) R.assign("matrix", matrix) R.assign("headers", headers) nref = R( """length( matrix[,1] )""" ) dat = R("""dat <- data.frame(x = matrix[,2], y = matrix[,3])""") mod = R("""mod <- lm( y ~ x, dat)""") R("""plot( matrix[,2], matrix[,3], cex=%s, col="blue", pch="o", xlab="%s", ylab="%s" %s)""" % ( 0.3, headers[1], headers[2], "")) R( """new <- data.frame(x = seq( min(matrix[,2]), max(matrix[,2]), (max(matrix[,2]) - min(matrix[,2])) / 100))""") R("""predict(mod, new, se.fit = TRUE)""") R("""pred.w.plim <- predict(mod, new, interval="prediction")""") R("""pred.w.clim <- predict(mod, new, interval="confidence")""") R( """matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")""") R.mtext( "y = %f * x + %f, r=%6.4f, n=%i" % (mod["coefficients"]["x"], mod["coefficients"]["(Intercept)"], R("""cor( dat )[2]"""), nref), 3, cex=1.0) R("""mean_rate <- mean( matrix[,3] )""") data_matrix, data_headers = readRates(lines) R.assign("data_matrix", data_matrix) R.assign("data_headers", data_headers) ndata = R( """length( data_matrix[,1] )""" ) R("""points( data_matrix[,2], data_matrix[,3], cex=%s, col="red", pch="o" %s)""" % ( 0.3, "")) R("""topred <- data.frame( x = data_matrix[,2] )""") R("""corrected_rates <- predict( mod, topred, se.fit = TRUE )""") uncorrected = R("""uncorrected <- data_matrix[,3] / mean_rate """) corrected = R( """corrected <- as.vector(data_matrix[,3] / corrected_rates$fit)""") R.dev_off() R.png(options.output_filename_pattern % "correction" + ".png", width=1024, height=768, type="cairo") R("""plot( uncorrected, corrected, cex=%s, col="blue", pch="o", xlab="uncorrected rate", ylab="corrected rate" %s)""" % (0.3, "")) R.dev_off() E.Stop()
def doFDR(pvalues, vlambda=numpy.arange(0, 0.95, 0.05), pi0_method="smoother", fdr_level=None, robust=False, smooth_df=3, smooth_log_pi0=False): """modeled after code taken from http://genomics.princeton.edu/storeylab/qvalue/linux.html. I did not like the error handling so I translated most to python. Compute FDR after method by Storey et al. (2002). """ if min(pvalues) < 0 or max(pvalues) > 1: raise ValueError("p-values out of range") if len(vlambda) > 1 and len(vlambda) < 4: raise ValueError( " If length of vlambda greater than 1, you need at least 4 values." ) if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1): raise ValueError("vlambda must be within [0, 1).") m = len(pvalues) # these next few functions are the various ways to estimate pi0 if len(vlambda) == 1: vlambda = vlambda[0] if vlambda < 0 or vlambda >= 1: raise ValueError("vlambda must be within [0, 1).") pi0 = numpy.mean([x >= vlambda for x in pvalues]) / (1.0 - vlambda) pi0 = min(pi0, 1.0) R.assign("pi0", pi0) else: pi0 = numpy.zeros(len(vlambda), numpy.float) for i in range(len(vlambda)): pi0[i] = numpy.mean([x >= vlambda[i] for x in pvalues]) / (1.0 - vlambda[i]) R.assign("pi0", pi0) R.assign("vlambda", vlambda) if pi0_method == "smoother": if smooth_log_pi0: pi0 = math.log(pi0) R.assign("smooth_df", smooth_df) spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""") pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""") if smooth_log_pi0: pi0 = math.exp(pi0) elif pi0_method == "bootstrap": minpi0 = min(pi0) mse = numpy.zeros(len(vlambda), numpy.float) pi0_boot = numpy.zeros(len(vlambda), numpy.float) R.assign("pvalues", pvalues) pi0 = R(""" m <- length(pvalues) minpi0 <- min(pi0) mse <- rep(0,length(vlambda)) pi0_boot <- rep(0,length(vlambda)) for(i in 1:100) { pvalues_boot <- sample(pvalues,size=m,replace=TRUE) for(i in 1:length(vlambda)) { pi0_boot[i] <- mean(pvalues_boot>vlambda[i])/(1-vlambda[i]) } mse <- mse + (pi0_boot-minpi0)^2 } pi0 <- min(pi0[mse==min(mse)])""") else: raise ValueError( "'pi0_method' must be one of 'smoother' or 'bootstrap'.") pi0 = min(pi0, 1.0) R.assign("pi0", pi0) if pi0 <= 0: raise ValueError( "The estimated pi0 <= 0. Check that you have valid p-values or use another vlambda method." ) if fdr_level != None and (fdr_level <= 0 or fdr_level > 1): raise ValueError("'fdr_level' must be within (0, 1].") # The estimated q-values calculated here #u = numpy.argsort( p ) # change by Alan # ranking function which returns number of observations less than or equal R.assign("pvalues", pvalues) R.assign("robust", robust) qvalues = R("""u <- order(pvalues) qvalues.rank <- function(x) { idx <- sort.list(x) fc <- factor(x) nl <- length(levels(fc)) bin <- as.integer(fc) tbl <- tabulate(bin) cs <- cumsum(tbl) tbl <- rep(cs, tbl) tbl[idx] <- tbl return(tbl) } v <- qvalues.rank(pvalues) m <- length(pvalues) qvalues <- pi0 * m * pvalues / v if(robust) { qvalues <- pi0*m*pvalues/(v*(1-(1-pvalues)^m)) } qvalues[u[m]] <- min(qvalues[u[m]],1) for(i in (m-1):1) { qvalues[u[i]] <- min(qvalues[u[i]],qvalues[u[i+1]],1) } qvalues """) result = FDRResult() result.mQValues = qvalues if fdr_level != None: result.mPassed = [x <= fdr_level for x in result.mQValues] else: result.mPassed = [False for x in result.mQValues] result.mPValues = pvalues result.mPi0 = pi0 result.mLambda = vlambda return result
def getPi0(pvalues, vlambda=numpy.arange(0, 0.95, 0.05), pi0_method="smoother", smooth_df=3, smooth_log_pi0=False): '''used within nubiscan.''' if min(pvalues) < 0 or max(pvalues) > 1: raise ValueError("p-values out of range") if len(vlambda) > 1 and len(vlambda) < 4: raise ValueError( " If length of vlambda greater than 1, you need at least 4 values." ) if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1): raise ValueError("vlambda must be within [0, 1).") m = len(pvalues) # these next few functions are the various ways to estimate pi0 if len(vlambda) == 1: vlambda = vlambda[0] if vlambda < 0 or vlambda >= 1: raise ValueError("vlambda must be within [0, 1).") pi0 = numpy.mean([x >= vlambda for x in pvalues]) / (1.0 - vlambda) pi0 = min(pi0, 1.0) R.assign("pi0", pi0) else: pi0 = numpy.zeros(len(vlambda), numpy.float) for i in range(len(vlambda)): pi0[i] = numpy.mean([x >= vlambda[i] for x in pvalues]) / (1.0 - vlambda[i]) R.assign("pi0", pi0) R.assign("vlambda", vlambda) if pi0_method == "smoother": if smooth_log_pi0: pi0 = math.log(pi0) R.assign("smooth_df", smooth_df) spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""") pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""") if smooth_log_pi0: pi0 = math.exp(pi0) elif pi0_method == "bootstrap": minpi0 = min(pi0) mse = numpy.zeros(len(vlambda), numpy.float) pi0_boot = numpy.zeros(len(vlambda), numpy.float) R.assign("pvalues", pvalues) pi0 = R(""" m <- length(pvalues) minpi0 <- min(pi0) mse <- rep(0,length(vlambda)) pi0_boot <- rep(0,length(vlambda)) for(i in 1:100) { pvalues_boot <- sample(pvalues,size=m,replace=TRUE) for(i in 1:length(vlambda)) { pi0_boot[i] <- mean(pvalues_boot>vlambda[i])/(1-vlambda[i]) } mse <- mse + (pi0_boot-minpi0)^2 } pi0 <- min(pi0[mse==min(mse)])""") else: raise ValueError( "'pi0_method' must be one of 'smoother' or 'bootstrap'.") pi0 = min(pi0, 1.0) if pi0 <= 0: raise ValueError( "The estimated pi0 <= 0. Check that you have valid p-values or use another vlambda method." ) return pi0
def main(): parser = E.OptionParser( version = "%prog version: $Id: rates2rates.py 2781 2009-09-10 11:33:14Z andreas $", usage = globals()["__doc__"]) parser.add_option( "--output-filename-pattern", dest="output_filename_pattern", type="string", help="pattern for additional output files [%default]." ) parser.add_option( "--input-filename-neutral", dest="input_filename_neutral", type="string", help="a tab-separated file with rates and G+C content in neutrally evolving regions [%default]." ) parser.set_defaults( input_filename_neutral = None, output_filename_pattern = "%s", normalize = True, hardcopy = None, ) (options, args) = E.Start( parser, add_csv_options = True ) if not options.input_filename_neutral: raise ValueError( "please supply a file with neutral rates." ) lines = options.stdin.readlines() if len(lines) == 0: raise IOError ( "no input" ) from rpy import r as R import rpy R.png( options.output_filename_pattern % "fit" + ".png", width=1024, height=768, type="cairo") matrix, headers = readRates( open( options.input_filename_neutral, "r" ) ) R.assign("matrix", matrix) R.assign("headers", headers) nref = R( """length( matrix[,1] )""" ) dat = R("""dat <- data.frame(x = matrix[,2], y = matrix[,3])""") mod = R("""mod <- lm( y ~ x, dat)""") R("""plot( matrix[,2], matrix[,3], cex=%s, col="blue", pch="o", xlab="%s", ylab="%s" %s)""" % (0.3, headers[1], headers[2], "") ) R("""new <- data.frame(x = seq( min(matrix[,2]), max(matrix[,2]), (max(matrix[,2]) - min(matrix[,2])) / 100))""") R("""predict(mod, new, se.fit = TRUE)""") R("""pred.w.plim <- predict(mod, new, interval="prediction")""") R("""pred.w.clim <- predict(mod, new, interval="confidence")""") R("""matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")""") R.mtext( "y = %f * x + %f, r=%6.4f, n=%i" % (mod["coefficients"]["x"], mod["coefficients"]["(Intercept)"], R("""cor( dat )[2]"""), nref ), 3, cex = 1.0) R("""mean_rate <- mean( matrix[,3] )""") data_matrix, data_headers = readRates( lines ) R.assign("data_matrix", data_matrix) R.assign("data_headers", data_headers) ndata = R( """length( data_matrix[,1] )""" ) R("""points( data_matrix[,2], data_matrix[,3], cex=%s, col="red", pch="o" %s)""" % (0.3, "") ) R("""topred <- data.frame( x = data_matrix[,2] )""") R("""corrected_rates <- predict( mod, topred, se.fit = TRUE )""") uncorrected = R("""uncorrected <- data_matrix[,3] / mean_rate """) corrected = R("""corrected <- as.vector(data_matrix[,3] / corrected_rates$fit)""") R.dev_off() R.png( options.output_filename_pattern % "correction" + ".png", width=1024, height=768, type="cairo") R("""plot( uncorrected, corrected, cex=%s, col="blue", pch="o", xlab="uncorrected rate", ylab="corrected rate" %s)""" % (0.3, "") ) R.dev_off() E.Stop()
def generateCountsGraph2( self, counts, sitename, widthpx=648, resol=72, ): ''' Static function to generate graph file via R. Graphs *all* of the counts records contained in counts List This one uses more in-R processing to handle dates/times (since Rpy doesn't do automatic conversions). ''' log = logging.getLogger() log.info('Generating graph for %d counts from site %s' % (len(counts), sitename)) from rpy import r as robj # Calculate graph image information ratio = float(self.config.get('data', 'graphratio')) widthpx = int(widthpx) imgwidth = int(float(widthpx) / float(resol)) imgheight = int(((float(widthpx) * ratio) / float(resol))) resol = int(resol) # Get unused file/name to put image data into... (fd, tmpgraphfile) = mkstemp() log.debug("Temp graph filename = %s" % tmpgraphfile) # Unpack CountsRecords into counts and timestamps. cts = [] ctm = [] for cr in counts: # cr.datetime = "2008-02-11 12:07:08.112117" # cr.c1 = 5440 cts.append(cr.c1) ctm.append(str(cr.datetime)) log.debug("Got list of %d counts." % len(cts)) # If there is data for a graph, import into R. if len(cts) > 0: robj.assign('rcts', cts) robj.assign('rctm', ctm) # Convert timestamps to POSIXct objects within R. # datpt <- as.POSIXct(strptime(dat,format="%Y-%m-%d %H:%M:%S")) robj( '''rctmpct <- as.POSIXct(strptime(rctm, format="%Y-%m-%d %H:%M:%S"))''' ) cmdstring = 'bitmap( "%s", type="png256", width=%s, height=%s, res=%s)' % ( tmpgraphfile, imgwidth, imgheight, resol) log.debug("R cmdstring is %s" % cmdstring) robj(cmdstring) log.debug("Completed R command string %s" % cmdstring) ymin = int(self.config.get('data', 'counts.graph.ylim.min')) ymax = int(self.config.get('data', 'counts.graph.ylim.max')) #xlabel = " ctm[%s] -- ctm[%s] " % ("0",str( len(ctm)-1)) xlabel = " %s -- %s " % (ctm[0], ctm[len(ctm) - 1]) cmdstring = 'plot( rctmpct, rcts, col="black",main="Counts: %s", xlab="Dates: %s",ylab="Counts/min",type="l", ylim=c(%d,%d) )' % ( sitename, xlabel, ymin, ymax) log.debug("R cmdstring is %s" % cmdstring) robj(cmdstring) log.debug("Completed R command string %s" % cmdstring) robj.dev_off() # Pull written image and return to caller import imghdr imgtype = imghdr.what(tmpgraphfile) log.debug("OK: What is our tempfile? = %s" % tmpgraphfile) f = open(tmpgraphfile) else: log.debug("No data. Generating proper error image...") f = open(self.config.get('data', 'nodatapng')) return f
def platt(light, etr, ini=None): """ Adjust a curve of best fit, following the Platt model. Parameters ---------- light : arr Generally PAR values. Where Photosynthetic Active Radiance interfer on Primary Production. 'light' is this parameter. etr : arr Eletron Transference Rate, by means relative ETR, obtained from Rapid Light Curves. ini : List optional intial values for optimization proccess. Returns ------- iniR : arr Initial values modeled, with R `optim` function. opts : arr Curve adjusted with ETR values modeled. pars : arr Curve parameters (alpha, Ek, ETRmax) See Also -------- T. Platt, C.L. Gallegos and W.G. Harrison, 1980. Photoinibition of photosynthesis in natural assemblages of marine phytoplankton. Journal of Marine Research, 38:4, 687-701. """ opts = [] pars = [] r.assign("x", light[~np.isnan(light)]) r.assign("y", etr[~np.isnan(etr)]) if ini is None: r.assign('ini', [0.4, 1.5, 1500]) else: r.assign('ini', np.array(ini)) min_platt = r(""" platt<- function(params){ alpha<-params[1] Beta<- params[2] Ps<- params[3] return( sum( (y-Ps*(1-exp(-alpha*x/Ps))*exp(-Beta*x/Ps))^2)) } """) min_adp = r(""" min_ad<-function(params){ alpha<-params[1] Beta<-params[2] Ps<-params[3] return( ( (Ps*(1-exp(-alpha*x/Ps)) *exp(-Beta*x/Ps)) ) ) }""") r('etr_sim<-optim(par=ini, fn=platt)') r('p_alpha<-etr_sim$par[1]') r('p_Beta<-etr_sim$par[2]') r('p_Ps2<-etr_sim$par[3]') r(''' if (p_Beta==0 | p_Beta<0){ p_etrmax<-p_Ps2 }else { p_etrmax<-p_Ps2*(p_alpha/(p_alpha+p_Beta))* (p_Beta/(p_alpha+p_Beta))^(p_Beta/p_alpha) } p_Ek<-p_etrmax/p_alpha ''') iniR = r('etr_sim$par') opts = np.append(opts, r('min_ad(par = etr_sim$par)')) cpars = r('as.data.frame(cbind(p_alpha, p_Ek, p_etrmax))') pars = [cpars['p_alpha'], cpars['p_Ek'], cpars['p_etrmax']] return iniR, opts, pars
def eilers_peeters(light, etr, ini=None): """ Adjust a best fit curve to ExP curves, according to Eilers & Peters Model. Parameters ---------- light : arr Generally PAR values. Where Photosynthetic Active Radiance interfer on Primary Production. 'light' is this parameter. etr : arr Eletron Transference Rate, by means relative ETR, obtained from Rapid Light Curves. ini : None Initial values values to set the curve. To insert initial values, they must be a list of values of initial parameters (a,b,c) of Eilers-Peeters models Return ------ iniR : arr Initial values modeled, with R `optim` function. opts : arr Values optimized params : arr Curve Parameters (alpha, Ek, ETR_max) See Also -------- P.H.C. Eilers and J.C.H Peeters. 1988. A model for the relationship between the light intensity and the rate of photosynthesis in phytoplankton. Ecol. Model. 42:199-215. #TODO ## Implement minimisation in Python. ## It's not very clear how to apply `nls2` in Python. ## minimize from a list of initial values. ##a = varis[0] ##b = varis[1] ##c = varis[2] #a = mini['a'] #b = mini['b'] #c = mini['c'] #opts = (light/(a*(light**2)+(b*light)+c)) #ad = fmin(ep_minimize,varis,args=(light,etr)) #alpha = (1./ad[2]) #etrmax = 1./(ad[1]+2*(ad[0]*ad[2])**0.5) #Eopt = (ad[2]/ad[0])**0.5 #Ek = etrmax/alpha #params = [alpha, Ek, etrmax, Eopt] """ r('library(nls2)') r.assign("x", light[~np.isnan(light)]) r.assign("y", etr[~np.isnan(etr)]) r('dat<-as.data.frame(cbind(x,y))') r('names(dat)<-c("light","etr")') if ini is None: r('''grid<-expand.grid(list(a=seq(1e-07,9e-06,by=2e-07), b=seq(-0.002,0.006,by=0.002),c=seq(-6,6,by=2)))''') mini = r(''' mini<-coefficients(nls2(etr~light/(a*light^2+b*light+c), data=dat, start=grid, algorithm="brute-force")) ''') else: mini = ini r.assign("mini", mini) r('''ep<-nls(etr~light/(a*light^2+b*light+c),data=dat, start=list(a=mini[1],b=mini[2],c=mini[3]), lower = list(0,-Inf,-Inf), trace=FALSE, algorithm = "port", nls.control("maxiter"=100000, tol=0.15)) a2<-summary(ep)$coefficients[1] b2<-summary(ep)$coefficients[2] c2<-summary(ep)$coefficients[3] alpha<-(1/c2) etrmax<-1/(b2+2*(a2*c2)^0.5) Eopt<-(c2/a2)^0.5 Ek<-etrmax/alpha''') iniR = mini alpha = r('alpha') Ek = r('Ek') etr_max = r('etrmax') params = [alpha, Ek, etr_max] opts = r('opts<-fitted(ep)') return iniR, opts, params
def etr_plot(curves, indir='.', subplot=False): ''' Relative Electron Transference Rate plots. INPUT ----- curves : list of curves indir : directory to save plots. Default current directory subplot : Default False. OUTPUT ------ Return = Figure saved (png) ''' light = [] etr = [] opts = [] x = [] y = [] if type(curves) != list: raise TypeError('variable "curves", must be a list') for ii,cur in enumerate(curves): light.append(np.float64(cur['PAR'])) etr.append(np.float64(cur['ETR1'])/1000.) etr.append(np.float64(cur['ETR2'])/1000.) etr.append(np.float64(cur['ETR3'])/1000.) etr.append(np.float64(cur['ETR4'])/1000.) for value in etr: r.assign("x",light[0]) r.assign("y",value) min_platt = r(""" platt<- function(params){ alpha<-params[1] Beta<- params[2] Ps<- params[3] return( sum( (y-Ps*(1-exp(-alpha*x/Ps))*exp(-Beta*x/Ps))^2)) } """) min_adp = r(""" min_ad<-function(params){ alpha<-params[1] Beta<-params[2] Ps<-params[3] return( ( (Ps*(1-exp(-alpha*x/Ps)) *exp(-Beta*x/Ps)) )) }""") r('etr_sim<-optim(par=c(0.4, 1.5 , 80),fn=platt)') opts.append(r('min_ad(par = etr_sim$par)')) if subplot==True: #plt.subplot(6,5,cur+1) for i in xrange(len(opts)): #plt.figure(figsize=(8, 6), dpi=80) plt.subplot(6,5,ii+1) plt.subplots_adjust(wspace=0.2, hspace=0.7) #adjust spaces between subplots plt.plot(light[0],etr[i], 'o', color=str(colors[i])) plt.ylim(0, 100) plt.xlabel('Light (PAR)') plt.ylabel(u'(rETR)') plt.plot(np.sort(light[0]), np.sort(opts[i]), color=str(colors[i]), label=str(compr[i])) plt.legend(loc='upper left', prop={'size':7}) #markerscale=0.1, borderpad=0.1, labelspacing=0.1, mpl.font_manager.FontManager(size=10))#,ncol=4,prop=font_manager.FontProperties(size=10)) plt.title(str(cur['Date'][0]+' '+cur['Time'][0])) light = [] etr = [] e_sim = [] opts = [] x = [] y = [] if subplot==True: plt.savefig(os.path.join(indir + str(cur['Date'][0]+'_'+ cur['Time'][0] + '.png'))) plt.show() return opts
def doFDR(pvalues, vlambda=numpy.arange(0,0.95,0.05), pi0_method="smoother", fdr_level=None, robust=False, smooth_df = 3, smooth_log_pi0 = False): """modeled after code taken from http://genomics.princeton.edu/storeylab/qvalue/linux.html. I did not like the error handling so I translated most to python. Compute FDR after method by Storey et al. (2002). """ if min(pvalues) < 0 or max(pvalues) > 1: raise ValueError( "p-values out of range" ) if len(vlambda) > 1 and len(vlambda) < 4: raise ValueError(" If length of vlambda greater than 1, you need at least 4 values." ) if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1): raise ValueError( "vlambda must be within [0, 1).") m = len(pvalues) # these next few functions are the various ways to estimate pi0 if len(vlambda)==1: vlambda = vlambda[0] if vlambda < 0 or vlambda >=1 : raise ValueError( "vlambda must be within [0, 1).") pi0 = numpy.mean( [ x >= vlambda for x in pvalues ] ) / (1.0 - vlambda) pi0 = min(pi0, 1.0) R.assign( "pi0", pi0) else: pi0 = numpy.zeros( len(vlambda), numpy.float ) for i in range( len(vlambda) ): pi0[i] = numpy.mean( [x >= vlambda[i] for x in pvalues ]) / (1.0 -vlambda[i] ) R.assign( "pi0", pi0) R.assign( "vlambda", vlambda) if pi0_method=="smoother": if smooth_log_pi0: pi0 = math.log(pi0) R.assign( "smooth_df", smooth_df) spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""") pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""") if smooth_log_pi0: pi0 = math.exp(pi0) elif pi0_method=="bootstrap": minpi0 = min(pi0) mse = numpy.zeros( len(vlambda), numpy.float ) pi0_boot = numpy.zeros( len(vlambda), numpy.float ) R.assign( "pvalues", pvalues) pi0 = R(""" m <- length(pvalues) minpi0 <- min(pi0) mse <- rep(0,length(vlambda)) pi0_boot <- rep(0,length(vlambda)) for(i in 1:100) { pvalues_boot <- sample(pvalues,size=m,replace=TRUE) for(i in 1:length(vlambda)) { pi0_boot[i] <- mean(pvalues_boot>vlambda[i])/(1-vlambda[i]) } mse <- mse + (pi0_boot-minpi0)^2 } pi0 <- min(pi0[mse==min(mse)])""") else: raise ValueError( "'pi0_method' must be one of 'smoother' or 'bootstrap'.") pi0 = min(pi0,1.0) R.assign( "pi0", pi0 ) if pi0 <= 0: raise ValueError( "The estimated pi0 <= 0. Check that you have valid p-values or use another vlambda method." ) if fdr_level != None and (fdr_level <= 0 or fdr_level > 1): raise ValueError( "'fdr_level' must be within (0, 1].") # The estimated q-values calculated here #u = numpy.argsort( p ) # change by Alan # ranking function which returns number of observations less than or equal R.assign( "pvalues", pvalues ) R.assign( "robust", robust ) qvalues = R("""u <- order(pvalues) qvalues.rank <- function(x) { idx <- sort.list(x) fc <- factor(x) nl <- length(levels(fc)) bin <- as.integer(fc) tbl <- tabulate(bin) cs <- cumsum(tbl) tbl <- rep(cs, tbl) tbl[idx] <- tbl return(tbl) } v <- qvalues.rank(pvalues) m <- length(pvalues) qvalues <- pi0 * m * pvalues / v if(robust) { qvalues <- pi0*m*pvalues/(v*(1-(1-pvalues)^m)) } qvalues[u[m]] <- min(qvalues[u[m]],1) for(i in (m-1):1) { qvalues[u[i]] <- min(qvalues[u[i]],qvalues[u[i+1]],1) } qvalues """) result = FDRResult() result.mQValues = qvalues if fdr_level != None: result.mPassed = [ x <= fdr_level for x in result.mQValues ] else: result.mPassed = [ False for x in result.mQValues ] result.mPValues = pvalues result.mPi0 = pi0 result.mLambda = vlambda return result
def getPi0( pvalues, vlambda=numpy.arange(0,0.95,0.05), pi0_method="smoother", smooth_df = 3, smooth_log_pi0 = False): '''used within nubiscan.''' if min(pvalues) < 0 or max(pvalues) > 1: raise ValueError( "p-values out of range" ) if len(vlambda) > 1 and len(vlambda) < 4: raise ValueError(" If length of vlambda greater than 1, you need at least 4 values." ) if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1): raise ValueError( "vlambda must be within [0, 1).") m = len(pvalues) # these next few functions are the various ways to estimate pi0 if len(vlambda)==1: vlambda = vlambda[0] if vlambda < 0 or vlambda >=1 : raise ValueError( "vlambda must be within [0, 1).") pi0 = numpy.mean( [ x >= vlambda for x in pvalues ] ) / (1.0 - vlambda) pi0 = min(pi0, 1.0) R.assign( "pi0", pi0) else: pi0 = numpy.zeros( len(vlambda), numpy.float ) for i in range( len(vlambda) ): pi0[i] = numpy.mean( [x >= vlambda[i] for x in pvalues ]) / (1.0 -vlambda[i] ) R.assign( "pi0", pi0) R.assign( "vlambda", vlambda) if pi0_method=="smoother": if smooth_log_pi0: pi0 = math.log(pi0) R.assign( "smooth_df", smooth_df) spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""") pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""") if smooth_log_pi0: pi0 = math.exp(pi0) elif pi0_method=="bootstrap": minpi0 = min(pi0) mse = numpy.zeros( len(vlambda), numpy.float ) pi0_boot = numpy.zeros( len(vlambda), numpy.float ) R.assign( "pvalues", pvalues) pi0 = R(""" m <- length(pvalues) minpi0 <- min(pi0) mse <- rep(0,length(vlambda)) pi0_boot <- rep(0,length(vlambda)) for(i in 1:100) { pvalues_boot <- sample(pvalues,size=m,replace=TRUE) for(i in 1:length(vlambda)) { pi0_boot[i] <- mean(pvalues_boot>vlambda[i])/(1-vlambda[i]) } mse <- mse + (pi0_boot-minpi0)^2 } pi0 <- min(pi0[mse==min(mse)])""") else: raise ValueError( "'pi0_method' must be one of 'smoother' or 'bootstrap'.") pi0 = min(pi0,1.0) if pi0 <= 0: raise ValueError( "The estimated pi0 <= 0. Check that you have valid p-values or use another vlambda method." ) return pi0