Exemple #1
0
def anova2(values,
           factor1,
           factor2,
           factor1name="factor1",
           factor2name="factor2",
           interaction=True):
    """ python wrapper for a two-way anova in R with optional interaction term ( default=True ) """
    # build a dataframe for R
    dataframe = {}
    dataframe["feature"] = values
    dataframe["factor1"] = factor1
    dataframe["factor2"] = factor2
    r.assign("df", dataframe)
    r("df$factor1 <- factor( df$factor1 )")
    r("df$factor2 <- factor( df$factor2 )")
    # run the model
    results = r("anova( lm( df$feature ~ df$factor1 %s df$factor2 ) )" %
                ("*" if interaction else "+"))
    r("rm( list=ls() )")
    # convert R results to table
    colheads = ["Df", "Sum Sq", "Mean Sq", "F value", "Pr( >F )"]
    rowheads = [factor1name, factor2name]
    rowheads += ["int term", "error"] if interaction else ["error"]
    ndictData = {}
    for rowhead in results.keys():
        for index, name in zip(range(len(rowheads)), rowheads):
            dictName = ndictData.setdefault(name, {})
            dictName[rowhead] = results[rowhead][index]
    # return as zopy table
    return nesteddict2table(ndictData, rowheads, colheads)
Exemple #2
0
 def r_to_str(robj):
     "Returns an R object in a representation as a list of strings."
     from rpy import r
     from tempfile import mktemp
     tmpfile = mktemp()
     #logging.info('Tmpfile: %s' % tmpfile)
     try:
         r.assign('tmpobj', robj)
         r('save(tmpobj, file="%s", ascii=TRUE)' % tmpfile)
         return open(tmpfile).read()
     finally:
         if os.access(tmpfile, os.R_OK):
             os.remove(tmpfile)
Exemple #3
0
 def r_to_str(robj):
     "Returns an R object in a representation as a list of strings."
     from rpy import r
     from tempfile import mktemp
     tmpfile = mktemp()
     #logging.info('Tmpfile: %s' % tmpfile)
     try:
         r.assign('tmpobj', robj)
         r('save(tmpobj, file="%s", ascii=TRUE)' % tmpfile)
         return open(tmpfile).read()
     finally:
         if os.access(tmpfile, os.R_OK):
             os.remove(tmpfile)
Exemple #4
0
def platt_opts(light, params):
    """
    Adjust `opt` values of PAR levels following the Platt model.

    Parameters
    ----------
    light : arr
        Generally PAR values. Where Photosynthetic Active Radiance
        interfer on Primary Production. 'light' is this parameter.
    params: arr
        Containing values of (alpha, Beta, etrmax).

    Returns
    -------
    opts : arr
        Values optimized according to `params`and list of PAR levels.
    """
    opts = []

    r.assign("light", light[~np.isnan(light)])
    r.assign("params", params)
    # if opt == None:
    #     r.assign("opt", light[~np.isnan(light)])
    # else:
    #     r.assign("opt", opt[~np.isnan(opt)])

    # if ini == None:
    #     r.assign('ini', [0.4,1.5,1500])

    # else:
    #     r.assign('ini', np.array(ini))

    # op, platt_param = platt(light,etr, ini=ini)
    # r.assign('platt_param', platt_param)

    min_opt = r("""
    min_opt<-function(light,params){
        alpha<-params[1]
        Beta<-params[2]
        Ps<-params[3]
        return( ( (Ps*(1-exp(-alpha*light/Ps)) *exp(-Beta*light/Ps)) ) )
    }""")

    opts = np.append(opts, r('min_opt(light, params)'))

    return opts
Exemple #5
0
def anova(values, factor1, factor1name="factor1"):
    """ python wrapper for a one-way ANOVA in R """
    # build a dataframe for R
    dataframe = {}
    dataframe["feature"] = values
    dataframe["factor1"] = factor1
    r.assign("df", dataframe)
    r("df$factor1 <- factor( df$factor1 )")
    # run the model
    results = r("anova( lm( df$feature ~ df$factor1 ) )")
    r("rm( list=ls() )")
    # convert R results to table
    colheads = ["Df", "Sum Sq", "Mean Sq", "F value", "Pr( >F )"]
    rowheads = [factor1name, "error"]
    ndictData = {}
    for rowhead in results.keys():
        for index, name in zip(range(len(rowheads)), rowheads):
            dictName = ndictData.setdefault(name, {})
            dictName[rowhead] = results[rowhead][index]
    # return as zopy table
    return nesteddict2table(ndictData, rowheads, colheads)
Exemple #6
0
    def plot(self, hardcopy = None):

        if hardcopy:
            R.png(hardcopy, width=1024, height=768, type="cairo")

        R.require('qvalue')

        # build a qobj
        R.assign( "pval", self.mPValues )
        R.assign( "pi0", self.mPi0 )
        R.assign( "qval", self.mQValues )
        R.assign( "lambda", self.mLambda )
        R("""qobj <-list( pi0=pi0, qvalues=qval, pvalues=pval, lambda=lambda)""")
        R(""" class(qobj) <- "qvalue" """)

        R("""qplot(qobj)""")

        if hardcopy:
            R.dev_off()
Exemple #7
0
    def plot(self, hardcopy=None):

        if hardcopy:
            R.png(hardcopy, width=1024, height=768, type="cairo")

        R.require('qvalue')

        # build a qobj
        R.assign("pval", self.mPValues)
        R.assign("pi0", self.mPi0)
        R.assign("qval", self.mQValues)
        R.assign("lambda", self.mLambda)
        R("""qobj <-list( pi0=pi0, qvalues=qval, pvalues=pval, lambda=lambda)"""
          )
        R(""" class(qobj) <- "qvalue" """)

        R("""qplot(qobj)""")

        if hardcopy:
            R.dev_off()
Exemple #8
0
def smooth_data(data):
    sample_data=data[0]
    window_size=data[1]
    for rep_num in range(sample_data.get_number_of_replicates()):
        for chrom in sample_data.get_chromosome_list():
            met_manager = sample_data.get_manager_of_chrom(chrom)
            pos=[]
            m=[]
            cov=[]
            for methyl_c in met_manager:
                pos.append(methyl_c.position)
                m.append(methyl_c.get_methylrate(rep_num))
                cov.append(methyl_c.get_coverage(rep_num))
            r.warnings()
            r.library("locfit")
            r.assign("pos",pos)
            r.assign("m",m)
            r.assign("cov",cov)
            r.assign("h",window_size)
            r("posm=data.frame(pos,m)")
            r("fit=locfit(m~lp(pos,h=h),data=posm,maxk=1000000,weights=cov)")
            r("pp=preplot(fit,where='data',band='local',newdata=data.frame(pos=pos))")
            fit=r("pp")["fit"]
            list=r("unlist(pp$xev$xev)")
            for i, each in enumerate(list):
                position=int(each[0])
                methyl_c=met_manager.get_methyl_c(position)
                if methyl_c:
                    smoothedrate=None
                    if 1 <= fit[i]:
                        smoothedrate=1
                    elif fit[i] <= 0:
                        smoothedrate=0
                    else:
                        smoothedrate=fit[i]
                    methyl_c.update_methylrate(rep_num,smoothedrate)
                else:
                    sys.stderr.write("methyl_c doesn't exist at %d",position)
                    sys.exit(1)
Exemple #9
0
def smoothPValues(pvalues,
                  vlambda=numpy.arange(0, 0.95, 0.05),
                  smooth_df=3,
                  smooth_log_pi0=False):

    if min(pvalues) < 0 or max(pvalues) > 1:
        raise ValueError("p-values out of range")

    if len(vlambda) > 1 and len(vlambda) < 4:
        raise ValueError(
            " If length of vlambda greater than 1, you need at least 4 values."
        )

    if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1):
        raise ValueError("vlambda must be within [0, 1).")

    m = len(pvalues)

    pi0 = numpy.zeros(len(vlambda), numpy.float)

    for i in range(len(vlambda)):
        pi0[i] = numpy.mean([x >= vlambda[i]
                             for x in pvalues]) / (1.0 - vlambda[i])

    R.assign("pi0", pi0)
    R.assign("vlambda", vlambda)
    print "pi0=", pi0

    if smooth_log_pi0:
        pi0 = math.log(pi0)

    R.assign("smooth_df", smooth_df)

    spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""")
    pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""")

    print spi0
    if smooth_log_pi0:
        pi0 = math.exp(pi0)

    return pi0
Exemple #10
0
def smoothPValues( pvalues,
                   vlambda=numpy.arange(0,0.95,0.05),
                   smooth_df = 3,
                   smooth_log_pi0 = False):

    if min(pvalues) < 0 or max(pvalues) > 1:
        raise ValueError( "p-values out of range" )

    if len(vlambda) > 1 and len(vlambda) < 4:
        raise ValueError(" If length of vlambda greater than 1, you need at least 4 values." )

    if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1):
        raise ValueError( "vlambda must be within [0, 1).")

    m = len(pvalues)

    pi0 = numpy.zeros( len(vlambda), numpy.float )

    for i in range( len(vlambda) ):
        pi0[i] = numpy.mean( [x >= vlambda[i] for x in pvalues ]) / (1.0 -vlambda[i] )

    R.assign( "pi0", pi0)
    R.assign( "vlambda", vlambda)
    print "pi0=", pi0
    
    if smooth_log_pi0:
        pi0 = math.log(pi0)

    R.assign( "smooth_df", smooth_df)

    spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""")
    pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""")

    print spi0
    if smooth_log_pi0:
        pi0 = math.exp(pi0)

    return pi0
Exemple #11
0
def etr_plot(curves, indir='.', subplot=False):
    '''
    Relative Electron Transference Rate plots.

    INPUT
    -----
    curves : list of curves
    indir : directory to save plots. Default current directory
    subplot : Default False.

    OUTPUT
    ------
    Return =     
    Figure saved (png)
    '''
    light = []
    etr = []
    opts = []
    x = []
    y = []

    if type(curves) != list:
        raise TypeError('variable "curves", must be a list')

    for ii, cur in enumerate(curves):
        light.append(np.float64(cur['PAR']))
        etr.append(np.float64(cur['ETR1']) / 1000.)
        etr.append(np.float64(cur['ETR2']) / 1000.)
        etr.append(np.float64(cur['ETR3']) / 1000.)
        etr.append(np.float64(cur['ETR4']) / 1000.)
        for value in etr:
            r.assign("x", light[0])
            r.assign("y", value)
            min_platt = r("""
            platt<- function(params){
                alpha<-params[1]
                Beta<- params[2]
                Ps<- params[3]
                return( sum( (y-Ps*(1-exp(-alpha*x/Ps))*exp(-Beta*x/Ps))^2))
            } """)
            min_adp = r("""
            min_ad<-function(params){ 
                alpha<-params[1]
                Beta<-params[2]
                Ps<-params[3]
                return( ( (Ps*(1-exp(-alpha*x/Ps)) *exp(-Beta*x/Ps)) ))
            }""")
            r('etr_sim<-optim(par=c(0.4, 1.5 , 80),fn=platt)')
            opts.append(r('min_ad(par = etr_sim$par)'))

        if subplot == True:
            #plt.subplot(6,5,cur+1)
            for i in xrange(len(opts)):
                #plt.figure(figsize=(8, 6), dpi=80)
                plt.subplot(6, 5, ii + 1)
                plt.subplots_adjust(
                    wspace=0.2, hspace=0.7)  #adjust spaces between subplots
                plt.plot(light[0], etr[i], 'o', color=str(colors[i]))
                plt.ylim(0, 100)
                plt.xlabel('Light (PAR)')
                plt.ylabel(u'(rETR)')
                plt.plot(np.sort(light[0]),
                         np.sort(opts[i]),
                         color=str(colors[i]),
                         label=str(compr[i]))
                plt.legend(
                    loc='upper left', prop={'size': 7}
                )  #markerscale=0.1, borderpad=0.1, labelspacing=0.1, mpl.font_manager.FontManager(size=10))#,ncol=4,prop=font_manager.FontProperties(size=10))
                plt.title(str(cur['Date'][0] + ' ' + cur['Time'][0]))
            light = []
            etr = []
            e_sim = []
            opts = []
            x = []
            y = []
    if subplot == True:
        plt.savefig(
            os.path.join(indir +
                         str(cur['Date'][0] + '_' + cur['Time'][0] + '.png')))

    plt.show()

    return opts
Exemple #12
0
def main():

    parser = E.OptionParser(
        version="%prog version: $Id: rates2rates.py 2781 2009-09-10 11:33:14Z andreas $", usage=globals()["__doc__"])

    parser.add_option("--output-filename-pattern", dest="output_filename_pattern", type="string",
                      help="pattern for additional output files [%default].")

    parser.add_option("--input-filename-neutral", dest="input_filename_neutral", type="string",
                      help="a tab-separated file with rates and G+C content in neutrally evolving regions [%default].")

    parser.set_defaults(
        input_filename_neutral=None,
        output_filename_pattern="%s",
        normalize=True,
        hardcopy=None,
    )

    (options, args) = E.Start(parser, add_csv_options=True)

    if not options.input_filename_neutral:
        raise ValueError("please supply a file with neutral rates.")

    lines = options.stdin.readlines()
    if len(lines) == 0:
        raise IOError("no input")

    from rpy import r as R
    import rpy

    R.png(options.output_filename_pattern %
          "fit" + ".png", width=1024, height=768, type="cairo")
    matrix, headers = readRates(open(options.input_filename_neutral, "r"))
    R.assign("matrix", matrix)
    R.assign("headers", headers)
    nref = R( """length( matrix[,1] )""" )

    dat = R("""dat <- data.frame(x = matrix[,2], y = matrix[,3])""")
    mod = R("""mod <- lm( y ~ x, dat)""")

    R("""plot( matrix[,2], matrix[,3], cex=%s, col="blue", pch="o", xlab="%s", ylab="%s" %s)""" % (
        0.3, headers[1], headers[2], ""))
    R(
        """new <- data.frame(x = seq( min(matrix[,2]), max(matrix[,2]), (max(matrix[,2]) - min(matrix[,2])) / 100))""")
    R("""predict(mod, new, se.fit = TRUE)""")
    R("""pred.w.plim <- predict(mod, new, interval="prediction")""")
    R("""pred.w.clim <- predict(mod, new, interval="confidence")""")
    R(
        """matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")""")
    R.mtext(
        "y = %f * x + %f, r=%6.4f, n=%i" % (mod["coefficients"]["x"],
                                            mod["coefficients"]["(Intercept)"],
                                            R("""cor( dat )[2]"""),
                                            nref),
        3,
        cex=1.0)

    R("""mean_rate <- mean( matrix[,3] )""")

    data_matrix, data_headers = readRates(lines)
    R.assign("data_matrix", data_matrix)
    R.assign("data_headers", data_headers)
    ndata = R( """length( data_matrix[,1] )""" )

    R("""points( data_matrix[,2], data_matrix[,3], cex=%s, col="red", pch="o" %s)""" % (
        0.3, ""))
    R("""topred <- data.frame( x = data_matrix[,2] )""")
    R("""corrected_rates <- predict( mod, topred, se.fit = TRUE )""")
    uncorrected = R("""uncorrected <- data_matrix[,3] / mean_rate """)
    corrected = R(
        """corrected <- as.vector(data_matrix[,3] / corrected_rates$fit)""")
    R.dev_off()

    R.png(options.output_filename_pattern %
          "correction" + ".png", width=1024, height=768, type="cairo")
    R("""plot( uncorrected, corrected, cex=%s, col="blue", pch="o", xlab="uncorrected rate", ylab="corrected rate" %s)""" %
      (0.3, ""))
    R.dev_off()

    E.Stop()
Exemple #13
0
def doFDR(pvalues,
          vlambda=numpy.arange(0, 0.95, 0.05),
          pi0_method="smoother",
          fdr_level=None,
          robust=False,
          smooth_df=3,
          smooth_log_pi0=False):
    """modeled after code taken from http://genomics.princeton.edu/storeylab/qvalue/linux.html.

    I did not like the error handling so I translated most to python.
    
    Compute FDR after method by Storey et al. (2002).
    """

    if min(pvalues) < 0 or max(pvalues) > 1:
        raise ValueError("p-values out of range")

    if len(vlambda) > 1 and len(vlambda) < 4:
        raise ValueError(
            " If length of vlambda greater than 1, you need at least 4 values."
        )

    if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1):
        raise ValueError("vlambda must be within [0, 1).")

    m = len(pvalues)

    # these next few functions are the various ways to estimate pi0
    if len(vlambda) == 1:
        vlambda = vlambda[0]
        if vlambda < 0 or vlambda >= 1:
            raise ValueError("vlambda must be within [0, 1).")

        pi0 = numpy.mean([x >= vlambda for x in pvalues]) / (1.0 - vlambda)
        pi0 = min(pi0, 1.0)
        R.assign("pi0", pi0)
    else:
        pi0 = numpy.zeros(len(vlambda), numpy.float)

        for i in range(len(vlambda)):
            pi0[i] = numpy.mean([x >= vlambda[i]
                                 for x in pvalues]) / (1.0 - vlambda[i])

        R.assign("pi0", pi0)
        R.assign("vlambda", vlambda)

        if pi0_method == "smoother":
            if smooth_log_pi0:
                pi0 = math.log(pi0)

            R.assign("smooth_df", smooth_df)

            spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""")
            pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""")
            if smooth_log_pi0:
                pi0 = math.exp(pi0)

        elif pi0_method == "bootstrap":

            minpi0 = min(pi0)

            mse = numpy.zeros(len(vlambda), numpy.float)
            pi0_boot = numpy.zeros(len(vlambda), numpy.float)

            R.assign("pvalues", pvalues)
            pi0 = R("""
            m <- length(pvalues)
            minpi0 <- min(pi0)
            mse <- rep(0,length(vlambda))
            pi0_boot <- rep(0,length(vlambda))
            for(i in 1:100) 
            {
                pvalues_boot <- sample(pvalues,size=m,replace=TRUE)
                for(i in 1:length(vlambda)) 
                {
                    pi0_boot[i] <- mean(pvalues_boot>vlambda[i])/(1-vlambda[i])
                }
                mse <- mse + (pi0_boot-minpi0)^2
            }
            pi0 <- min(pi0[mse==min(mse)])""")
        else:
            raise ValueError(
                "'pi0_method' must be one of 'smoother' or 'bootstrap'.")

        pi0 = min(pi0, 1.0)
        R.assign("pi0", pi0)

    if pi0 <= 0:
        raise ValueError(
            "The estimated pi0 <= 0. Check that you have valid p-values or use another vlambda method."
        )

    if fdr_level != None and (fdr_level <= 0 or fdr_level > 1):
        raise ValueError("'fdr_level' must be within (0, 1].")

    # The estimated q-values calculated here
    #u = numpy.argsort( p )

    # change by Alan
    # ranking function which returns number of observations less than or equal
    R.assign("pvalues", pvalues)
    R.assign("robust", robust)
    qvalues = R("""u <- order(pvalues)
    qvalues.rank <- function(x) 
{
      idx <- sort.list(x)

      fc <- factor(x)
      nl <- length(levels(fc))
      bin <- as.integer(fc)
      tbl <- tabulate(bin)
      cs <- cumsum(tbl)

      tbl <- rep(cs, tbl)
      tbl[idx] <- tbl

      return(tbl)
}

v <- qvalues.rank(pvalues)
m <- length(pvalues)
qvalues <- pi0 * m * pvalues / v

if(robust) 
{
        qvalues <- pi0*m*pvalues/(v*(1-(1-pvalues)^m))
}
qvalues[u[m]] <- min(qvalues[u[m]],1)

for(i in (m-1):1) 
{
   qvalues[u[i]] <- min(qvalues[u[i]],qvalues[u[i+1]],1)
}
qvalues
""")

    result = FDRResult()
    result.mQValues = qvalues

    if fdr_level != None:
        result.mPassed = [x <= fdr_level for x in result.mQValues]
    else:
        result.mPassed = [False for x in result.mQValues]

    result.mPValues = pvalues
    result.mPi0 = pi0
    result.mLambda = vlambda

    return result
Exemple #14
0
def getPi0(pvalues,
           vlambda=numpy.arange(0, 0.95, 0.05),
           pi0_method="smoother",
           smooth_df=3,
           smooth_log_pi0=False):
    '''used within nubiscan.'''

    if min(pvalues) < 0 or max(pvalues) > 1:
        raise ValueError("p-values out of range")

    if len(vlambda) > 1 and len(vlambda) < 4:
        raise ValueError(
            " If length of vlambda greater than 1, you need at least 4 values."
        )

    if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1):
        raise ValueError("vlambda must be within [0, 1).")

    m = len(pvalues)

    # these next few functions are the various ways to estimate pi0
    if len(vlambda) == 1:
        vlambda = vlambda[0]
        if vlambda < 0 or vlambda >= 1:
            raise ValueError("vlambda must be within [0, 1).")

        pi0 = numpy.mean([x >= vlambda for x in pvalues]) / (1.0 - vlambda)
        pi0 = min(pi0, 1.0)
        R.assign("pi0", pi0)

    else:

        pi0 = numpy.zeros(len(vlambda), numpy.float)

        for i in range(len(vlambda)):
            pi0[i] = numpy.mean([x >= vlambda[i]
                                 for x in pvalues]) / (1.0 - vlambda[i])

        R.assign("pi0", pi0)
        R.assign("vlambda", vlambda)

        if pi0_method == "smoother":
            if smooth_log_pi0:
                pi0 = math.log(pi0)

            R.assign("smooth_df", smooth_df)

            spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""")
            pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""")

            if smooth_log_pi0:
                pi0 = math.exp(pi0)

        elif pi0_method == "bootstrap":

            minpi0 = min(pi0)

            mse = numpy.zeros(len(vlambda), numpy.float)
            pi0_boot = numpy.zeros(len(vlambda), numpy.float)

            R.assign("pvalues", pvalues)
            pi0 = R("""
            m <- length(pvalues)
            minpi0 <- min(pi0)
            mse <- rep(0,length(vlambda))
            pi0_boot <- rep(0,length(vlambda))
            for(i in 1:100) 
            {
                pvalues_boot <- sample(pvalues,size=m,replace=TRUE)
                for(i in 1:length(vlambda)) 
                {
                    pi0_boot[i] <- mean(pvalues_boot>vlambda[i])/(1-vlambda[i])
                }
                mse <- mse + (pi0_boot-minpi0)^2
            }
            pi0 <- min(pi0[mse==min(mse)])""")
        else:
            raise ValueError(
                "'pi0_method' must be one of 'smoother' or 'bootstrap'.")

        pi0 = min(pi0, 1.0)

    if pi0 <= 0:
        raise ValueError(
            "The estimated pi0 <= 0. Check that you have valid p-values or use another vlambda method."
        )

    return pi0
Exemple #15
0
def main():

    parser = E.OptionParser( version = "%prog version: $Id: rates2rates.py 2781 2009-09-10 11:33:14Z andreas $", usage = globals()["__doc__"])

    parser.add_option( "--output-filename-pattern", dest="output_filename_pattern", type="string",
                      help="pattern for additional output files [%default]."  )

    parser.add_option( "--input-filename-neutral", dest="input_filename_neutral", type="string",
                      help="a tab-separated file with rates and G+C content in neutrally evolving regions [%default]."  )

    parser.set_defaults(
        input_filename_neutral = None,
        output_filename_pattern = "%s",
        normalize = True,
        hardcopy = None,
        )

    (options, args) = E.Start( parser, add_csv_options = True )

    if not options.input_filename_neutral:
        raise ValueError( "please supply a file with neutral rates." )

    lines = options.stdin.readlines()
    if len(lines) == 0:
        raise IOError ( "no input" )

    from rpy import r as R
    import rpy

    R.png( options.output_filename_pattern % "fit" + ".png", width=1024, height=768, type="cairo")
    matrix, headers = readRates( open( options.input_filename_neutral, "r" ) )
    R.assign("matrix", matrix)
    R.assign("headers", headers)
    nref = R( """length( matrix[,1] )""" )

    dat = R("""dat <- data.frame(x = matrix[,2], y = matrix[,3])""")
    mod = R("""mod <- lm( y ~ x, dat)""")

    R("""plot( matrix[,2], matrix[,3], cex=%s, col="blue", pch="o", xlab="%s", ylab="%s" %s)""" % (0.3, headers[1], headers[2], "") )
    R("""new <- data.frame(x = seq( min(matrix[,2]), max(matrix[,2]), (max(matrix[,2]) - min(matrix[,2])) / 100))""")
    R("""predict(mod, new, se.fit = TRUE)""")
    R("""pred.w.plim <- predict(mod, new, interval="prediction")""")
    R("""pred.w.clim <- predict(mod, new, interval="confidence")""")
    R("""matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")""")
    R.mtext(
        "y = %f * x + %f, r=%6.4f, n=%i" % (mod["coefficients"]["x"], 
                                            mod["coefficients"]["(Intercept)"], 
                                            R("""cor( dat )[2]"""), 
                                            nref ),
        3,
        cex = 1.0)

    R("""mean_rate <- mean( matrix[,3] )""")

    data_matrix, data_headers = readRates( lines )
    R.assign("data_matrix", data_matrix)
    R.assign("data_headers", data_headers)
    ndata = R( """length( data_matrix[,1] )""" )
    
    R("""points( data_matrix[,2], data_matrix[,3], cex=%s, col="red", pch="o" %s)""" % (0.3, "") )
    R("""topred <- data.frame( x = data_matrix[,2] )""")
    R("""corrected_rates <- predict( mod, topred, se.fit = TRUE )""")
    uncorrected = R("""uncorrected <- data_matrix[,3] / mean_rate """) 
    corrected = R("""corrected <- as.vector(data_matrix[,3] / corrected_rates$fit)""")
    R.dev_off()
    
    R.png( options.output_filename_pattern % "correction" + ".png", width=1024, height=768, type="cairo")
    R("""plot( uncorrected, corrected, cex=%s, col="blue", pch="o", xlab="uncorrected rate", ylab="corrected rate" %s)""" % (0.3, "") )
    R.dev_off()

    E.Stop()
Exemple #16
0
    def generateCountsGraph2(
        self,
        counts,
        sitename,
        widthpx=648,
        resol=72,
    ):
        '''
                Static function to generate graph file via R.
                Graphs *all* of the counts records contained in counts List
                This one uses more in-R processing to handle dates/times (since
                Rpy doesn't do automatic conversions). 
            '''
        log = logging.getLogger()
        log.info('Generating graph for %d counts from site %s' %
                 (len(counts), sitename))

        from rpy import r as robj

        # Calculate graph image information
        ratio = float(self.config.get('data', 'graphratio'))
        widthpx = int(widthpx)
        imgwidth = int(float(widthpx) / float(resol))
        imgheight = int(((float(widthpx) * ratio) / float(resol)))
        resol = int(resol)

        # Get unused file/name to put image data into...
        (fd, tmpgraphfile) = mkstemp()
        log.debug("Temp graph filename = %s" % tmpgraphfile)

        # Unpack CountsRecords into counts and timestamps.
        cts = []
        ctm = []
        for cr in counts:
            # cr.datetime = "2008-02-11 12:07:08.112117"
            # cr.c1 = 5440
            cts.append(cr.c1)
            ctm.append(str(cr.datetime))

        log.debug("Got list of %d counts." % len(cts))

        # If there is data for a graph, import into R.
        if len(cts) > 0:
            robj.assign('rcts', cts)
            robj.assign('rctm', ctm)

            # Convert timestamps to POSIXct objects within R.
            # datpt <- as.POSIXct(strptime(dat,format="%Y-%m-%d %H:%M:%S"))
            robj(
                '''rctmpct <- as.POSIXct(strptime(rctm, format="%Y-%m-%d %H:%M:%S"))'''
            )
            cmdstring = 'bitmap( "%s", type="png256", width=%s, height=%s, res=%s)' % (
                tmpgraphfile, imgwidth, imgheight, resol)
            log.debug("R cmdstring is %s" % cmdstring)
            robj(cmdstring)
            log.debug("Completed R command string %s" % cmdstring)

            ymin = int(self.config.get('data', 'counts.graph.ylim.min'))
            ymax = int(self.config.get('data', 'counts.graph.ylim.max'))
            #xlabel = " ctm[%s] -- ctm[%s] " % ("0",str( len(ctm)-1))
            xlabel = " %s -- %s " % (ctm[0], ctm[len(ctm) - 1])
            cmdstring = 'plot( rctmpct, rcts, col="black",main="Counts: %s", xlab="Dates:  %s",ylab="Counts/min",type="l", ylim=c(%d,%d) )' % (
                sitename, xlabel, ymin, ymax)
            log.debug("R cmdstring is %s" % cmdstring)
            robj(cmdstring)
            log.debug("Completed R command string %s" % cmdstring)
            robj.dev_off()

            # Pull written image and return to caller
            import imghdr
            imgtype = imghdr.what(tmpgraphfile)
            log.debug("OK: What is our tempfile? = %s" % tmpgraphfile)
            f = open(tmpgraphfile)
        else:
            log.debug("No data. Generating proper error image...")
            f = open(self.config.get('data', 'nodatapng'))
        return f
Exemple #17
0
def platt(light, etr, ini=None):
    """
    Adjust a curve of best fit, following the Platt model.

    Parameters
    ----------
    light : arr
        Generally PAR values. Where Photosynthetic Active Radiance
        interfer on Primary Production. 'light' is this parameter.

    etr : arr
        Eletron Transference Rate, by means relative ETR, obtained from
        Rapid Light Curves.

    ini : List
        optional intial values for optimization proccess.
    Returns
    -------
    iniR : arr
        Initial values modeled, with R `optim` function.
    opts : arr
        Curve adjusted with ETR values modeled.

    pars : arr
        Curve parameters (alpha, Ek, ETRmax)

    See Also
    --------
    T. Platt, C.L. Gallegos and W.G. Harrison, 1980. Photoinibition of
        photosynthesis in natural assemblages of marine phytoplankton. Journal
        of Marine Research, 38:4, 687-701.

    """
    opts = []
    pars = []

    r.assign("x", light[~np.isnan(light)])
    r.assign("y", etr[~np.isnan(etr)])

    if ini is None:
        r.assign('ini', [0.4, 1.5, 1500])

    else:
        r.assign('ini', np.array(ini))

    min_platt = r("""
    platt<- function(params){
        alpha<-params[1]
        Beta<- params[2]
        Ps<- params[3]
        return( sum( (y-Ps*(1-exp(-alpha*x/Ps))*exp(-Beta*x/Ps))^2))
    } """)
    min_adp = r("""
    min_ad<-function(params){
        alpha<-params[1]
        Beta<-params[2]
        Ps<-params[3]
        return( ( (Ps*(1-exp(-alpha*x/Ps)) *exp(-Beta*x/Ps)) ) )
    }""")
    r('etr_sim<-optim(par=ini, fn=platt)')
    r('p_alpha<-etr_sim$par[1]')
    r('p_Beta<-etr_sim$par[2]')
    r('p_Ps2<-etr_sim$par[3]')
    r('''
        if (p_Beta==0 | p_Beta<0){
            p_etrmax<-p_Ps2
        }else {
            p_etrmax<-p_Ps2*(p_alpha/(p_alpha+p_Beta))*
            (p_Beta/(p_alpha+p_Beta))^(p_Beta/p_alpha)
        }

        p_Ek<-p_etrmax/p_alpha
    ''')

    iniR = r('etr_sim$par')
    opts = np.append(opts, r('min_ad(par = etr_sim$par)'))
    cpars = r('as.data.frame(cbind(p_alpha, p_Ek, p_etrmax))')
    pars = [cpars['p_alpha'], cpars['p_Ek'], cpars['p_etrmax']]

    return iniR, opts, pars
Exemple #18
0
def eilers_peeters(light, etr, ini=None):
    """
    Adjust a best fit curve to ExP curves, according to Eilers  & Peters
    Model.

    Parameters
    ----------
    light : arr
        Generally PAR values. Where Photosynthetic Active Radiance
        interfer on Primary Production. 'light' is this parameter.

    etr : arr
        Eletron Transference Rate, by means relative ETR, obtained from
        Rapid Light Curves.

    ini : None
        Initial values values to set the curve.
        To insert initial values, they must be a list
        of values of initial parameters (a,b,c) of Eilers-Peeters models
    Return
    ------
    iniR : arr
        Initial values modeled, with R `optim` function.
    opts : arr
        Values optimized
    params : arr
        Curve Parameters (alpha, Ek, ETR_max)

    See Also
    --------
    P.H.C. Eilers and J.C.H Peeters. 1988. A model for the relationship
    between the light intensity and the rate of photosynthesis in
    phytoplankton. Ecol. Model. 42:199-215.

    #TODO
    ## Implement minimisation in Python.
    ## It's not very clear how to apply `nls2` in Python.
    ## minimize from a list of initial values.

    ##a = varis[0]
    ##b = varis[1]
    ##c = varis[2]
    #a = mini['a']
    #b = mini['b']
    #c = mini['c']

    #opts = (light/(a*(light**2)+(b*light)+c))
    #ad = fmin(ep_minimize,varis,args=(light,etr))

    #alpha = (1./ad[2])
    #etrmax = 1./(ad[1]+2*(ad[0]*ad[2])**0.5)
    #Eopt = (ad[2]/ad[0])**0.5
    #Ek = etrmax/alpha

    #params = [alpha, Ek, etrmax, Eopt]
    """
    r('library(nls2)')
    r.assign("x", light[~np.isnan(light)])
    r.assign("y", etr[~np.isnan(etr)])
    r('dat<-as.data.frame(cbind(x,y))')
    r('names(dat)<-c("light","etr")')

    if ini is None:
        r('''grid<-expand.grid(list(a=seq(1e-07,9e-06,by=2e-07),
        b=seq(-0.002,0.006,by=0.002),c=seq(-6,6,by=2)))''')
        mini = r('''
            mini<-coefficients(nls2(etr~light/(a*light^2+b*light+c),
            data=dat, start=grid, algorithm="brute-force"))
            ''')
    else:
        mini = ini
        r.assign("mini", mini)

    r('''ep<-nls(etr~light/(a*light^2+b*light+c),data=dat,
    start=list(a=mini[1],b=mini[2],c=mini[3]),
    lower = list(0,-Inf,-Inf), trace=FALSE,
    algorithm = "port", nls.control("maxiter"=100000, tol=0.15))

    a2<-summary(ep)$coefficients[1]
    b2<-summary(ep)$coefficients[2]
    c2<-summary(ep)$coefficients[3]

    alpha<-(1/c2)
    etrmax<-1/(b2+2*(a2*c2)^0.5)
    Eopt<-(c2/a2)^0.5
    Ek<-etrmax/alpha''')

    iniR = mini
    alpha = r('alpha')
    Ek = r('Ek')
    etr_max = r('etrmax')
    params = [alpha, Ek, etr_max]
    opts = r('opts<-fitted(ep)')

    return iniR, opts, params
Exemple #19
0
def etr_plot(curves, indir='.', subplot=False):
    '''
    Relative Electron Transference Rate plots.

    INPUT
    -----
    curves : list of curves
    indir : directory to save plots. Default current directory
    subplot : Default False.

    OUTPUT
    ------
    Return =     
    Figure saved (png)
    '''
    light = []
    etr = []
    opts = []
    x = []
    y = []

    if type(curves) != list:
        raise TypeError('variable "curves", must be a list')

    for ii,cur in enumerate(curves):
        light.append(np.float64(cur['PAR']))
        etr.append(np.float64(cur['ETR1'])/1000.)
        etr.append(np.float64(cur['ETR2'])/1000.)
        etr.append(np.float64(cur['ETR3'])/1000.)
        etr.append(np.float64(cur['ETR4'])/1000.)
        for value in etr:
            r.assign("x",light[0])
            r.assign("y",value)
            min_platt = r("""
            platt<- function(params){
                alpha<-params[1]
                Beta<- params[2]
                Ps<- params[3]
                return( sum( (y-Ps*(1-exp(-alpha*x/Ps))*exp(-Beta*x/Ps))^2))
            } """)
            min_adp = r("""
            min_ad<-function(params){ 
                alpha<-params[1]
                Beta<-params[2]
                Ps<-params[3]
                return( ( (Ps*(1-exp(-alpha*x/Ps)) *exp(-Beta*x/Ps)) ))
            }""")
            r('etr_sim<-optim(par=c(0.4, 1.5 , 80),fn=platt)')
            opts.append(r('min_ad(par = etr_sim$par)'))

        if subplot==True:
            #plt.subplot(6,5,cur+1)
            for i in xrange(len(opts)):
                #plt.figure(figsize=(8, 6), dpi=80)
                plt.subplot(6,5,ii+1)
                plt.subplots_adjust(wspace=0.2, hspace=0.7) #adjust spaces between subplots
                plt.plot(light[0],etr[i], 'o', color=str(colors[i]))
                plt.ylim(0, 100)
                plt.xlabel('Light (PAR)')
                plt.ylabel(u'(rETR)')
                plt.plot(np.sort(light[0]), np.sort(opts[i]), color=str(colors[i]), label=str(compr[i]))
                plt.legend(loc='upper left', prop={'size':7})  #markerscale=0.1, borderpad=0.1, labelspacing=0.1, mpl.font_manager.FontManager(size=10))#,ncol=4,prop=font_manager.FontProperties(size=10))
                plt.title(str(cur['Date'][0]+' '+cur['Time'][0]))
            light = []
            etr = []
            e_sim = []
            opts = []
            x = []
            y = []
    if subplot==True:
        plt.savefig(os.path.join(indir + str(cur['Date'][0]+'_'+ cur['Time'][0] + '.png')))
        
    plt.show()
        
    return opts
Exemple #20
0
def doFDR(pvalues, 
          vlambda=numpy.arange(0,0.95,0.05), 
          pi0_method="smoother", 
          fdr_level=None, 
          robust=False,
          smooth_df = 3,
          smooth_log_pi0 = False):
    """modeled after code taken from http://genomics.princeton.edu/storeylab/qvalue/linux.html.

    I did not like the error handling so I translated most to python.
    
    Compute FDR after method by Storey et al. (2002).
    """

    if min(pvalues) < 0 or max(pvalues) > 1:
        raise ValueError( "p-values out of range" )

    if len(vlambda) > 1 and len(vlambda) < 4:
        raise ValueError(" If length of vlambda greater than 1, you need at least 4 values." )

    if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1):
        raise ValueError( "vlambda must be within [0, 1).")

    m = len(pvalues)

     # these next few functions are the various ways to estimate pi0
    if len(vlambda)==1: 
        vlambda = vlambda[0]
        if  vlambda < 0 or vlambda >=1 :
            raise ValueError( "vlambda must be within [0, 1).")

        pi0 = numpy.mean( [ x >= vlambda for x in pvalues ] ) / (1.0 - vlambda)
        pi0 = min(pi0, 1.0)
        R.assign( "pi0", pi0)
    else:
        pi0 = numpy.zeros( len(vlambda), numpy.float )

        for i in range( len(vlambda) ):
            pi0[i] = numpy.mean( [x >= vlambda[i] for x in pvalues ]) / (1.0 -vlambda[i] )

        R.assign( "pi0", pi0)
        R.assign( "vlambda", vlambda)

        if pi0_method=="smoother":
            if smooth_log_pi0:
                pi0 = math.log(pi0)
                
            R.assign( "smooth_df", smooth_df)

            spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""")
            pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""")
            if smooth_log_pi0:
                pi0 = math.exp(pi0)

        elif pi0_method=="bootstrap":

            minpi0 = min(pi0)

            mse = numpy.zeros( len(vlambda), numpy.float )
            pi0_boot = numpy.zeros( len(vlambda), numpy.float )

            R.assign( "pvalues", pvalues)
            pi0 = R("""
            m <- length(pvalues)
            minpi0 <- min(pi0)
            mse <- rep(0,length(vlambda))
            pi0_boot <- rep(0,length(vlambda))
            for(i in 1:100) 
            {
                pvalues_boot <- sample(pvalues,size=m,replace=TRUE)
                for(i in 1:length(vlambda)) 
                {
                    pi0_boot[i] <- mean(pvalues_boot>vlambda[i])/(1-vlambda[i])
                }
                mse <- mse + (pi0_boot-minpi0)^2
            }
            pi0 <- min(pi0[mse==min(mse)])""")
        else:
            raise ValueError( "'pi0_method' must be one of 'smoother' or 'bootstrap'.")

        pi0 = min(pi0,1.0)
        R.assign( "pi0", pi0 )

    if pi0 <= 0:
        raise ValueError( "The estimated pi0 <= 0. Check that you have valid p-values or use another vlambda method." )

    if fdr_level != None and (fdr_level <= 0 or fdr_level > 1):
        raise ValueError( "'fdr_level' must be within (0, 1].")

    # The estimated q-values calculated here
    #u = numpy.argsort( p )

    # change by Alan
    # ranking function which returns number of observations less than or equal
    R.assign( "pvalues", pvalues )
    R.assign( "robust", robust )
    qvalues = R("""u <- order(pvalues)
    qvalues.rank <- function(x) 
{
      idx <- sort.list(x)

      fc <- factor(x)
      nl <- length(levels(fc))
      bin <- as.integer(fc)
      tbl <- tabulate(bin)
      cs <- cumsum(tbl)

      tbl <- rep(cs, tbl)
      tbl[idx] <- tbl

      return(tbl)
}

v <- qvalues.rank(pvalues)
m <- length(pvalues)
qvalues <- pi0 * m * pvalues / v

if(robust) 
{
        qvalues <- pi0*m*pvalues/(v*(1-(1-pvalues)^m))
}
qvalues[u[m]] <- min(qvalues[u[m]],1)

for(i in (m-1):1) 
{
   qvalues[u[i]] <- min(qvalues[u[i]],qvalues[u[i+1]],1)
}
qvalues
""")

    result = FDRResult()
    result.mQValues = qvalues

    if fdr_level != None:
        result.mPassed = [ x <= fdr_level for x in result.mQValues ]
    else:
        result.mPassed = [ False for x in result.mQValues ]
        
    result.mPValues = pvalues
    result.mPi0 = pi0
    result.mLambda = vlambda
    
    return result
Exemple #21
0
def getPi0( pvalues, 
            vlambda=numpy.arange(0,0.95,0.05), 
            pi0_method="smoother",
            smooth_df = 3,
            smooth_log_pi0 = False):
    '''used within nubiscan.'''
    
    if min(pvalues) < 0 or max(pvalues) > 1:
        raise ValueError( "p-values out of range" )

    if len(vlambda) > 1 and len(vlambda) < 4:
        raise ValueError(" If length of vlambda greater than 1, you need at least 4 values." )

    if len(vlambda) > 1 and (min(vlambda) < 0 or max(vlambda) >= 1):
        raise ValueError( "vlambda must be within [0, 1).")

    m = len(pvalues)

     # these next few functions are the various ways to estimate pi0
    if len(vlambda)==1: 
        vlambda = vlambda[0]
        if  vlambda < 0 or vlambda >=1 :
            raise ValueError( "vlambda must be within [0, 1).")

        pi0 = numpy.mean( [ x >= vlambda for x in pvalues ] ) / (1.0 - vlambda)
        pi0 = min(pi0, 1.0)
        R.assign( "pi0", pi0)
        
    else:
        
        pi0 = numpy.zeros( len(vlambda), numpy.float )

        for i in range( len(vlambda) ):
            pi0[i] = numpy.mean( [x >= vlambda[i] for x in pvalues ]) / (1.0 -vlambda[i] )

        R.assign( "pi0", pi0)
        R.assign( "vlambda", vlambda)

        if pi0_method=="smoother":
            if smooth_log_pi0:
                pi0 = math.log(pi0)
                
            R.assign( "smooth_df", smooth_df)

            spi0 = R("""spi0 <- smooth.spline(vlambda,pi0, df = smooth_df)""")
            pi0 = R("""pi0 <- predict( spi0, x = max(vlambda) )$y""")

            if smooth_log_pi0:
                pi0 = math.exp(pi0)

        elif pi0_method=="bootstrap":

            minpi0 = min(pi0)

            mse = numpy.zeros( len(vlambda), numpy.float )
            pi0_boot = numpy.zeros( len(vlambda), numpy.float )

            R.assign( "pvalues", pvalues)
            pi0 = R("""
            m <- length(pvalues)
            minpi0 <- min(pi0)
            mse <- rep(0,length(vlambda))
            pi0_boot <- rep(0,length(vlambda))
            for(i in 1:100) 
            {
                pvalues_boot <- sample(pvalues,size=m,replace=TRUE)
                for(i in 1:length(vlambda)) 
                {
                    pi0_boot[i] <- mean(pvalues_boot>vlambda[i])/(1-vlambda[i])
                }
                mse <- mse + (pi0_boot-minpi0)^2
            }
            pi0 <- min(pi0[mse==min(mse)])""")
        else:
            raise ValueError( "'pi0_method' must be one of 'smoother' or 'bootstrap'.")

        pi0 = min(pi0,1.0)

    if pi0 <= 0:
        raise ValueError( "The estimated pi0 <= 0. Check that you have valid p-values or use another vlambda method." )

    return pi0