Esempio n. 1
0
def bootdensity(data, min, max, nboot, ci):
    """ Calculate density and confidence intervals on density
    for a 1D array of points.  Bandwidth is selected automatically.
    """
    r("""
      limdensity <- function(data, weights=NULL, bw="nrd0")
      {
        density(data, from=%f, to=%f, weights=weights, bw=bw)
      }
      """%(min, max))
    density = r.limdensity(data)
    xdens = N.array(density['x'])
    ydens = N.array(density['y'])
    bw = density['bw']
    #print 'bandwidth:', bw
    ydensboot = N.zeros((nboot, len(xdens)), N.float)
    ndata = len(data)
    ran = N.random.uniform(0, ndata, (nboot,ndata)).astype(N.int)
    for i in range(nboot):
        den = r.limdensity(data[ran[i]])
        y = N.array(den['y'])
        ydensboot[i] = y
    ydensbootsort = N.sort(ydensboot, axis=0)
    ydensbootsort = interp1d(N.arange(0, 1.000001, 1.0/(nboot-1)),
                             ydensbootsort, axis=0)
    ilow = (0.5-ci/2.0)
    ihigh = (0.5+ci/2.0)
    ydenslow, ydenshigh = ydensbootsort((ilow, ihigh))
    ydenslow = gaussian_filter1d(ydenslow, bw*512/10.0)
    ydenshigh, ydenshigh = ydensbootsort((ihigh, ihigh))
    ydenshigh = gaussian_filter1d(ydenshigh, bw*512/10.0)
    return xdens, ydens, ydenslow, ydenshigh, bw
 def testHMM_paral(self):
     server_url = self.server_url
     self.get(server_url + "/",
         description="Get /")
     self.post(server_url + "/cgi-bin/adacghR.cgi", params=[
         ['acghData', Upload("empty.txt")],
         ['positionInfo', Upload("empty.txt")],
         ['twofiles', 'One.file'],
         ['acghAndPosition', Upload("two.sample.shuffled.num.test")],
         ['centering', 'None'],
         ['methodaCGH', 'HMM'],
         ['Wave.minDiff', '0.25'],
         ['Wave.merge', 'Yes'],
         ['PSW.nIter', '1000'],
         ['PSW.p.crit', '0.15'],
         ['ACE.fdr', '0.15'],
         ['MCR.gapAllowed', '500'],
         ['MCR.alteredLow', '0.03'],
         ['MCR.alteredHigh', '0.97'],
         ['MCR.recurrence', '75'],
         ['organism', 'None'],
         ['idtype', 'None']],
         description="CBS; numerical of parallel")
     final_output = 'Segmented data plots'
     common_part(self, final_output)
     url_before_get = self.getLastUrl()
     urlretrieve(server_url +
                 url_before_get.replace('results.html', 'HMM.output.txt'),
                 filename = 'HMM.web.output.txt')
     import rpy ## to verify numerical output
     print '##########  @@@@@@@@@@@@@@   Testing   HMM'
     tmp = rpy.r('source("test-num.R")')
     tmp = rpy.r('test.hmm()')
     print tmp
    def plotNumLegend(self, colVecL, breakL, nb_breaks, filename=None, legendDir=None, type='png', int_labels=False):

        if filename is None:
            filename = 'legend_%i_%i_%i' % (len(colVecL), min(breakL), max(breakL))

        if legendDir is None:
            legendDir = self.legendDir
            
        full_filename = os.path.join(legendDir, filename)

        max_break = max(breakL)
        min_break = min(breakL)
        tickBreakL = [float(x) / (nb_breaks - 1) * (max_break - min_break) - min_break for x in range(nb_breaks)]
        if int_labels:
            labels = ['%i' % int(x) for x in tickBreakL]
        else:
            labels = ['%.3f' % x for x in tickBreakL]
        
        rdev = plot_utilities.RDevice(name = full_filename, title='', plotType=type, 
            width=640, height=120)
        r("par(mar=c(3,1,0,1), las=2)")
        #legendA = rpy.reshape(breakL, (len(breakL), 1))
        legendA = r.matrix(breakL, byrow=False, ncol=1)
        r.image(legendA, 1, legendA, col=colVecL, axes=False, ann=False)
        r.box()
        r.axis(1, at=tickBreakL, labels=labels, tick=True, line=0, cex=0.8, cex_axis=0.8)
        rdev.close()
    
        return
Esempio n. 4
0
 def Monocle(self):
     print 'Loading affy package in R'
     print_out = r('library("monocle")')
     if "Error" in print_out:
         print 'Installing the R package "affy" in Config/R'
         print_out = r('source("http://bioconductor.org/biocLite.R"); biocLite("monocle")')
         print print_out
     print_out = r('library("monocle")')
Esempio n. 5
0
def make_L(data,direction='S',z=None,):
    """ Define the along track distance from one reference

        direction define the cardinal direction priority (N,S,W or E).
         S means that the reference will be the southern most point

        z define the bathymetry, if defined, the closest point to that
         bathymetry will be the reference. In case of cross this bathymetry
         more than once, the direction criteria is used to distinguish.
    """
    from fluid.common.distance import distance
    all_cycles_data = join_cycles(data)

    if z==None:
        import rpy
        #for t in topex.invert_keys(data):
        for t in all_cycles_data:
            rpy.set_default_mode(rpy.NO_CONVERSION)
            linear_model = rpy.r.lm(rpy.r("y ~ x"), data = rpy.r.data_frame(x=all_cycles_data[t]['Longitude'], y=all_cycles_data[t]['Latitude']))
            rpy.set_default_mode(rpy.BASIC_CONVERSION)
            coef=rpy.r.coef(linear_model)
            if direction=='S':
                lat0=all_cycles_data[t]['Latitude'].min()-1
                lon0 = (lat0-coef['(Intercept)'])/coef['x']
                L_correction = distance(all_cycles_data[t]['Latitude'],all_cycles_data[t]['Longitude'],lat0,lon0).min()
            for c in invert_keys(data)[t]:
                data[c][t]['L'] = distance(data[c][t]['Latitude'],data[c][t]['Longitude'],lat0,lon0)- L_correction
    # This bathymetric method was only copied from an old code. This should be atleast
    #  changed, if not removed.
    elif method=='bathymetric':
        import rpy
        for t in all_cycles_data:
            # First define the near coast values.
            idSouth=numpy.argmin(all_cycles_data[t]['Latitude'])
            L_tmp = distance(all_cycles_data[t]['Latitude'],all_cycles_data[t]['Longitude'],all_cycles_data[t]['Latitude'][idSouth],all_cycles_data[t]['Longitude'][idSouth])
            idNearCoast = L_tmp.data<400e3
            if min(all_cycles_data[t]['Bathy'][idNearCoast]) > -z:
                idNearCoast = L_tmp.data<600e3
            # Then calculate the distance to a reference
            rpy.set_default_mode(rpy.NO_CONVERSION)
            linear_model = rpy.r.lm(rpy.r("y ~ x"), data = rpy.r.data_frame(x=all_cycles_data[t]['Longitude'], y=all_cycles_data[t]['Latitude']))
            rpy.set_default_mode(rpy.BASIC_CONVERSION)
            coef=rpy.r.coef(linear_model)
            lat0 = all_cycles_data[t]['Latitude'].min()-1
            lon0 = (lat0-coef['(Intercept)'])/coef['x']
            #L = distance(,lon,lat0,lon0)
            #
            #id0 = numpy.argmin(numpy.absolute(all_cycles_data[t]['Bathy'][idNearCoast]))
            idref=numpy.argmin(numpy.absolute(all_cycles_data[t]['Bathy'][idNearCoast]+z))
            #L_correction = distance(all_cycles_data[t]['Latitude'][idNearCoast][idref],all_cycles_data[t]['Longitude'][idNearCoast][idref],all_cycles_data[t]['Latitude'][idNearCoast][idref],all_cycles_data[t]['Longitude'][idNearCoast][idref])
            L_correction = distance(all_cycles_data[t]['Latitude'][idNearCoast][idref],all_cycles_data[t]['Longitude'][idNearCoast][idref],lat0,lon0)
            for c in topex.invert_keys(data)[t]:
                #data[c][t]['L'] = distance(data[c][t]['Latitude'],data[c][t]['Longitude'],all_cycles_data[t]['Latitude'][idNearCoast][id0],all_cycles_data[t]['Longitude'][idNearCoast][id0]) - L_correction
                data[c][t]['L'] = distance(data[c][t]['Latitude'],data[c][t]['Longitude'],lat0,lon0) - L_correction
    #
    return
Esempio n. 6
0
def get_by_index(spss_data_file, gene_index_1, gene_index_2, trait_index_1, trait_index_2):
    r("data = read.spss(file='"+spss_data_file+"',to.data.frame=TRUE)")
    gene_index = str(gene_index_1)+":"+ str(gene_index_2)
    genes = r("names(data["+gene_index+"])")
    trait_index = str(trait_index_1)+":"+ str(trait_index_2)
    traits = r("names(data["+trait_index+"])")
    #print genes
    #print traits

    return genes, traits
Esempio n. 7
0
 def __init__(self, x, y, **kwargs):
     self.nvar = 1 if x.ndim==1 else x.shape[1]
     assert (x.ndim==1 and x.size==y.size) or (x.ndim==2 and x.shape[0]==y.size), "X and Y inputs must have same number of rows"
     assert (self.nvar < 5), "Maximum number of predictors is 4"
     df = with_mode(NO_CONVERSION, r.data_frame)(x=x,y=y.flatten())
     if x.ndim==1:
         model = r("y ~ x")
     else:
         model = r("y ~ " + ' + '.join('x.%d' % (i+1) for i in range(4)))
     self.smoother = with_mode(NO_CONVERSION, r.loess)(model, data=df, **kwargs)
 def Monocle(self):
     print 'Loading affy package in R'
     print_out = r('library("monocle")')
     if "Error" in print_out:
         print 'Installing the R package "affy" in Config/R'
         print_out = r(
             'source("http://bioconductor.org/biocLite.R"); biocLite("monocle")'
         )
         print print_out
     print_out = r('library("monocle")')
Esempio n. 9
0
def read_directory(sub_dir):
    dir=os.path.dirname(__file__) 
    #print "Working Directory:", r('getwd()')
    working_dir = dir+'/'+sub_dir[1:]
    setwd = 'setwd("%s")' % working_dir
    r(setwd)
    #print "Working Directory:", r('getwd()')
    
    dir_list = os.listdir(dir +'/'+ sub_dir[1:]); dir_list2 = []
    for entry in dir_list: #add in code to prevent folder names from being included
        if entry[-4:] == ".txt" or entry[-4:] == ".csv": dir_list2.append(entry)
    return dir_list2
Esempio n. 10
0
def ALS(s, thresh=.001, nonnegS=True, nonnegC=True):
    """Alternate least squares
    
    Wrapper around the R's ALS package
    
    Parameters
    ----------
    s : Spectrum instance
    threshold : float
        convergence criteria
    nonnegS : bool
        if True, impose non-negativity constraint on the components
    nonnegC : bool
        if True, impose non-negativity constraint on the maps
    
    Returns
    -------
    Dictionary   
    """
    import_rpy()
    #    Format
    #    ic format (channels, components)
    #    W format (experiment, components)
    #    s format (experiment, channels)

    nonnegS = 'TRUE' if nonnegS is True else 'FALSE'
    nonnegC = 'TRUE' if nonnegC is True else 'FALSE'
    print "Non negative constraint in the sources: ", nonnegS
    print "Non negative constraint in the mixing matrix: ", nonnegC

    refold = unfold_if_2D(s)
    W = s._calculate_recmatrix().T
    ic = np.ones(s.ic.shape)
    rpy.r.library('ALS')
    rpy.r('W = NULL')
    rpy.r('ic = NULL')
    rpy.r('d1 = NULL')
    rpy.r['<-']('d1', s.data_cube.squeeze().T)
    rpy.r['<-']('W', W)
    rpy.r['<-']('ic', ic)
    i = 0
    # Workaround a bug in python rpy version 1
    while hasattr(rpy.r, 'test' + str(i)):
        rpy.r('test%s = NULL' % i)
        i += 1
    rpy.r('test%s = als(CList = list(W), thresh = %s, S = ic,\
     PsiList = list(d1), nonnegS = %s, nonnegC = %s)' %
          (i, thresh, nonnegS, nonnegC))
    if refold:
        s.fold()
    exec('als_result = rpy.r.test%s' % i)
    return als_result
Esempio n. 11
0
def ALS(s, thresh =.001, nonnegS = True, nonnegC = True):
    """Alternate least squares

    Wrapper around the R's ALS package

    Parameters
    ----------
    s : Spectrum instance
    threshold : float
        convergence criteria
    nonnegS : bool
        if True, impose non-negativity constraint on the components
    nonnegC : bool
        if True, impose non-negativity constraint on the maps

    Returns
    -------
    Dictionary
    """
    import_rpy()
#    Format
#    ic format (channels, components)
#    W format (experiment, components)
#    s format (experiment, channels)

    nonnegS = 'TRUE' if nonnegS is True else 'FALSE'
    nonnegC = 'TRUE' if nonnegC is True else 'FALSE'
    print "Non negative constraint in the sources: ", nonnegS
    print "Non negative constraint in the mixing matrix: ", nonnegC

    refold = unfold_if_2D(s)
    W = s._calculate_recmatrix().T
    ic = np.ones(s.ic.shape)
    rpy.r.library('ALS')
    rpy.r('W = NULL')
    rpy.r('ic = NULL')
    rpy.r('d1 = NULL')
    rpy.r['<-']('d1', s.data_cube.squeeze().T)
    rpy.r['<-']('W', W)
    rpy.r['<-']('ic', ic)
    i = 0
    # Workaround a bug in python rpy version 1
    while hasattr(rpy.r, 'test' + str(i)):
        rpy.r('test%s = NULL' % i)
        i+=1
    rpy.r('test%s = als(CList = list(W), thresh = %s, S = ic,\
     PsiList = list(d1), nonnegS = %s, nonnegC = %s)' %
     (i, thresh, nonnegS, nonnegC))
    if refold:
        s.fold()
    exec('als_result = rpy.r.test%s' % i)
    return als_result
Esempio n. 12
0
 def r_to_str(robj):
     "Returns an R object in a representation as a list of strings."
     from rpy import r
     from tempfile import mktemp
     tmpfile = mktemp()
     #logging.info('Tmpfile: %s' % tmpfile)
     try:
         r.assign('tmpobj', robj)
         r('save(tmpobj, file="%s", ascii=TRUE)' % tmpfile)
         return open(tmpfile).read()
     finally:
         if os.access(tmpfile, os.R_OK):
             os.remove(tmpfile)
Esempio n. 13
0
 def r_to_str(robj):
     "Returns an R object in a representation as a list of strings."
     from rpy import r
     from tempfile import mktemp
     tmpfile = mktemp()
     #logging.info('Tmpfile: %s' % tmpfile)
     try:
         r.assign('tmpobj', robj)
         r('save(tmpobj, file="%s", ascii=TRUE)' % tmpfile)
         return open(tmpfile).read()
     finally:
         if os.access(tmpfile, os.R_OK):
             os.remove(tmpfile)
Esempio n. 14
0
def read_directory(sub_dir):
    dir = os.path.dirname(__file__)
    #print "Working Directory:", r('getwd()')
    working_dir = dir + '/' + sub_dir[1:]
    setwd = 'setwd("%s")' % working_dir
    r(setwd)
    #print "Working Directory:", r('getwd()')

    dir_list = os.listdir(dir + '/' + sub_dir[1:])
    dir_list2 = []
    for entry in dir_list:  #add in code to prevent folder names from being included
        if entry[-4:] == ".txt" or entry[-4:] == ".csv":
            dir_list2.append(entry)
    return dir_list2
Esempio n. 15
0
 def __init__(self, x, y, **kwargs):
     self.nvar = 1 if x.ndim == 1 else x.shape[1]
     assert (x.ndim == 1 and x.size == y.size) or (
         x.ndim == 2 and x.shape[0]
         == y.size), "X and Y inputs must have same number of rows"
     assert (self.nvar < 5), "Maximum number of predictors is 4"
     df = with_mode(NO_CONVERSION, r.data_frame)(x=x, y=y.flatten())
     if x.ndim == 1:
         model = r("y ~ x")
     else:
         model = r("y ~ " + ' + '.join('x.%d' % (i + 1) for i in range(4)))
     self.smoother = with_mode(NO_CONVERSION, r.loess)(model,
                                                       data=df,
                                                       **kwargs)
Esempio n. 16
0
def plot_entry_list(log_entry_list, binning=None, binning_dict=None, bin_widths=None, bin_widths_dict=None,
                    support_dict=None, xlab=None, ylab="", normalize_log_space=True, xmin=None, xmax=None,
                    main=None, bin_numbers=True):
    entries = convert_log_entries(log_entry_list)

    points = []

    for entry in entries:
        number, fullname, tarray = entry
        this_ylab = ylab

        # Decide on which binning to use
        if binning_dict!=None and binning_dict.has_key(number):
            bins = binning_dict[number][2]
        elif binning!=None and len(binning)==1 and len(binning[0][2])-1==len(tarray):
            bins = binning[0][2]
        else:
            bins = None

        # Normalize by bin width, if the binwidh is available 
        if bin_widths_dict!=None and bin_widths_dict.has_key(number):
            widths = bin_widths_dict[number][2]
        elif bin_widths!=None and len(bin_widths)==1 and len(bin_widths[0][2])==len(tarray):
            widths = bin_widths[0][2]
        else:
            widths = None

        if widths!=None:
            if normalize_log_space:
                tarray -= log(widths)
                this_ylab = r("expression(%s - ln(Delta[bin]))" % ylab)
            else:
                tarray = tarray/widths  # Note, /= worn't work, since tarray might be an int array
                this_ylab = r("expression(%s / Delta[bin])" % ylab)

        # Decide on which support to use
        if support_dict!=None and support_dict.has_key(number):
            support = support_dict[number][2]
        else:
            support = None

        # Do the plotting
        p = plot_name_array(fullname, tarray, 1, counts=False, bins=bins, support=support, makenewplot=True,
                            xlab=xlab, ylab=this_ylab, xmin=xmin, xmax=xmax,
                            main=main, bin_numbers=bin_numbers)

        points.append((number, fullname, p))

    return points
Esempio n. 17
0
def chisq_test(*samples):
    """do chi-square test on contingency table
    samples [(n, m),...]
    """
    data = transpose(samples)
    cmd = "chisq.test(data.frame(t=c(%s),f=c(%s)))" % (cjoin(data[0]), cjoin(data[1]))
    return rpy.r(cmd)["p.value"]
Esempio n. 18
0
File: ea.py Progetto: YuJinhui/kobas
def chisq(m1, n1, m2, n2, **kargs):
    """do chi-square test on contingency table
    samples [(n, m),...]
    """
    cmd = "chisq.test(matrix(c(%d, %d, %d, %d), nc=2))" \
          % (m1, n1-m1, m2, n2-m2)
    return rpy.r(cmd)['p.value']
Esempio n. 19
0
def read_data_old(spss_data_file):
#file=fileimp, to.data.frame=TRUE)
    
    r("library('foreign')")
    #r("library('stats')")
    data_sheet = r("read.spss(file='"+spss_data_file+"',to.data.frame=TRUE)")
    #print data_sheet
    #exit(0)
    #column_labels = data_sheet.keys()
    
    #exit(0)
#for label, values in data_sheet.items():
    #    new_vals = map(lambda x:if not is.str(x) : str(x), values)
     #   data_sheet[label] = new_vals
    
    return data_sheet #column_labels
Esempio n. 20
0
def chisq(m1, n1, m2, n2, **kargs):
    """do chi-square test on contingency table
    samples [(n, m),...]
    """
    cmd = "chisq.test(matrix(c(%d, %d, %d, %d), nc=2))" \
          % (m1, n1-m1, m2, n2-m2)
    return rpy.r(cmd)['p.value']
Esempio n. 21
0
def _generate_simplex_trajectories(r, p, mins, maxs, h):
	x = np.zeros((r * (p + 1), p)) 
	
	simpf = rpy.r("sensitivity:::random.simplexes")
	minsr = rpy.r.FloatVector(mins)
	maxsr = rpy.r.FloatVector(maxs)
	x[:,:] = simpf(p, r, minsr, maxsr, h)
	return x.reshape((r, p+1, p))
def verify_FishersExact(data_name):
    """ Verify output from Fishers' Exact test for IxJ contingency tables."""
    print "\n\n\n******* Verifying Fisher's test results \n"
    rpy.r('load("' + FILES_DIR + 'fisher.verified.RData")')
    fisherConnect = flstandalone.NumTesting()
    fisherConnect.setUp('http://pomelo2.bioinfo.cnio.es')
    fisherConnect.send_get_pomelo(FILES_DIR + data_name + '.data.txt',
                                  FILES_DIR + data_name + '.labels.txt',
                                  FILES_DIR + 'empty.txt',
                                  'FisherIxJ', '2')
    ## time.sleep(50)
    r_read = 'fisherPomelo <- readPomeloOutput()'
    r_compare = 'comparePomelo(fisherPomelo, fisher.pv, fisher = TRUE)'
    rpy.r(r_read)
    out_comparison = rpy.r(r_compare)
    if (out_comparison == 'ERROR: test failed'):
        raise AsteriasAssertionError
Esempio n. 23
0
def fisher(m1, n1, m2, n2, **kargs):
    """do chi-square test on contingency table
        samples [(n, m),...]
        """
    alternative = kargs.get('alternative', 'greater')
    cmd = 'fisher.test(matrix(c(%d, %d, %d, %d), nc=2), alternative="%s")' \
          % (m1, n1-m1, m2, n2-m2, alternative)
    return rpy.r(cmd)['p.value']
Esempio n. 24
0
File: ea.py Progetto: YuJinhui/kobas
def fisher(m1, n1, m2, n2, **kargs):
        """do chi-square test on contingency table
        samples [(n, m),...]
        """
        alternative = kargs.get('alternative', 'greater')
        cmd = 'fisher.test(matrix(c(%d, %d, %d, %d), nc=2), alternative="%s")' \
              % (m1, n1-m1, m2, n2-m2, alternative)
        return rpy.r(cmd)['p.value']
Esempio n. 25
0
    def pure_linear_model_via_R(cls, non_NA_genotype_ls, non_NA_phenotype_ls, non_NA_phenotype2count=None):
        """
		2010-2-25
			use createDesignMatrix() to generate a design matrix
		2009-8-28
			split out of pure_linear_model(). same functionality as pure_linear_model(), but invoke R to run regression.
		"""

        genotype_matrix = cls.createDesignMatrix(non_NA_genotype_ls)
        # 2008-11-10 do linear regression by R
        genotype_var = numpy.var(genotype_matrix[:, 0])  # 2008-11-10 var=\sum(x_i-\bar{x})^2/(n-1)
        rpy.set_default_mode(rpy.NO_CONVERSION)  # 04-07-05
        # data_frame = rpy.r.as_data_frame({"phenotype":non_NA_phenotype_ls, "genotype":rpy.r.as_factor(genotype_matrix[:,1])})
        formula_list = []
        data_frame_dict = {"phenotype": non_NA_phenotype_ls}
        for i in range(genotype_matrix.shape[1]):
            var_name = "genotype%s" % i
            formula_list.append(var_name)
            data_frame_dict.update({var_name: genotype_matrix[:, i]})
        data_frame = rpy.r.as_data_frame(data_frame_dict)
        formula = "phenotype~%s" % "+".join(formula_list)

        if non_NA_phenotype2count and len(non_NA_phenotype2count) == 2:  # binary phenotype, use logistic regression
            lm_result = rpy.r.glm(rpy.r(formula), data=data_frame, family=rpy.r("binomial"))
        else:
            lm_result = rpy.r.glm(rpy.r(formula), data=data_frame)
        rpy.set_default_mode(rpy.BASIC_CONVERSION)
        # 04-07-05 r.summary() requires lm_result in NO_CONVERSION state
        summary_stat = rpy.r.summary(lm_result)

        # 06-30-05	index 0 in summary_stat['coefficients'] is intercept
        coeff_list = []
        coeff_p_value_list = []
        for i in range(len(summary_stat["coefficients"])):
            coeff_list.append(summary_stat["coefficients"][i][0])  # 0 is the coefficient
            coeff_p_value_list.append(summary_stat["coefficients"][i][-1])  # -1 is the corresponding p-value
            # 06-30-05	fill in other efficients based on bit_string, NOTE i+1
        pvalue = coeff_p_value_list[1]
        residuals = summary_stat["deviance"]
        geno_effect_var = genotype_var * coeff_list[1] * coeff_list[1] * (no_of_rows - 1)
        var_perc = geno_effect_var / (residuals + geno_effect_var)

        pdata = PassingData(
            pvalue=pvalue, var_perc=var_perc, coeff_list=coeff_list, coeff_p_value_list=coeff_p_value_list
        )
        return pdata
Esempio n. 26
0
 def lm(self, l, h):
     for i in range(l, h + 1):
         data_frame, data_model = self.mount_reg_params(i)
         print data_model
         rpy.set_default_mode(rpy.NO_CONVERSION)
         linear_model = r.lm(r(data_model), data=data_frame)
         rpy.set_default_mode(rpy.BASIC_CONVERSION)
         print r.summary(linear_model)['r.squared']
Esempio n. 27
0
def chisq_test(*samples):
    """do chi-square test on contingency table
    samples [(n, m),...]
    """
    data = transpose(samples)
    cmd = "chisq.test(data.frame(t=c(%s),f=c(%s)))" \
          % (cjoin(data[0]), cjoin(data[1]))
    return rpy.r(cmd)['p.value']
Esempio n. 28
0
def platt_opts(light, params):
    """
    Adjust `opt` values of PAR levels following the Platt model.

    Parameters
    ----------
    light : arr
        Generally PAR values. Where Photosynthetic Active Radiance
        interfer on Primary Production. 'light' is this parameter.
    params: arr
        Containing values of (alpha, Beta, etrmax).

    Returns
    -------
    opts : arr
        Values optimized according to `params`and list of PAR levels.
    """
    opts = []

    r.assign("light", light[~np.isnan(light)])
    r.assign("params", params)
    # if opt == None:
    #     r.assign("opt", light[~np.isnan(light)])
    # else:
    #     r.assign("opt", opt[~np.isnan(opt)])

    # if ini == None:
    #     r.assign('ini', [0.4,1.5,1500])

    # else:
    #     r.assign('ini', np.array(ini))

    # op, platt_param = platt(light,etr, ini=ini)
    # r.assign('platt_param', platt_param)

    min_opt = r("""
    min_opt<-function(light,params){
        alpha<-params[1]
        Beta<-params[2]
        Ps<-params[3]
        return( ( (Ps*(1-exp(-alpha*light/Ps)) *exp(-Beta*light/Ps)) ) )
    }""")

    opts = np.append(opts, r('min_opt(light, params)'))

    return opts
Esempio n. 29
0
def regression(data):
    """Calls R's lm to make a linear regression on each of its inputs."""

    reg = r.lm(r('x ~ y'),
            data = r.data_frame(x=data[:,0], y=data[:,1])
            )['coefficients']

    return reg
Esempio n. 30
0
def plot_bin_widths(entries, log_space=False, main=None):
    for entry in entries:
        number, fullname, tarray = entry

        if log_space:
            tarray = numpy.log(tarray)
            ylab = r("expression(ln(Delta[bin]))")
        else:
            ylab = r("expression(Delta[bin])")

        if main==None:
            this_main = fullname
        else:
            this_main = main
        
        if len(tarray)>0:
            r.plot(range(len(tarray)), tarray, xlab="bin number", ylab=ylab, main=this_main)
Esempio n. 31
0
def anova2(values,
           factor1,
           factor2,
           factor1name="factor1",
           factor2name="factor2",
           interaction=True):
    """ python wrapper for a two-way anova in R with optional interaction term ( default=True ) """
    # build a dataframe for R
    dataframe = {}
    dataframe["feature"] = values
    dataframe["factor1"] = factor1
    dataframe["factor2"] = factor2
    r.assign("df", dataframe)
    r("df$factor1 <- factor( df$factor1 )")
    r("df$factor2 <- factor( df$factor2 )")
    # run the model
    results = r("anova( lm( df$feature ~ df$factor1 %s df$factor2 ) )" %
                ("*" if interaction else "+"))
    r("rm( list=ls() )")
    # convert R results to table
    colheads = ["Df", "Sum Sq", "Mean Sq", "F value", "Pr( >F )"]
    rowheads = [factor1name, factor2name]
    rowheads += ["int term", "error"] if interaction else ["error"]
    ndictData = {}
    for rowhead in results.keys():
        for index, name in zip(range(len(rowheads)), rowheads):
            dictName = ndictData.setdefault(name, {})
            dictName[rowhead] = results[rowhead][index]
    # return as zopy table
    return nesteddict2table(ndictData, rowheads, colheads)
Esempio n. 32
0
def binom(m1, n1, m2, n2, **kargs):
    """do chi-square test on contingency table
    samples [(n, m),...]
    """
    p = float(m2) / n2
    alternative = kargs.get('alternative', 'greater')
    cmd = 'binom.test(c(%d, %d), p=%f, alternative="%s")' \
          % (m1, n1-m1, p, alternative)
    return rpy.r(cmd)['p.value']
Esempio n. 33
0
def model_mean_and_variance(meanvar_ests):
    """Regression models of mean and var as functions of major allele len.
    NOTE: since alleles are already normalized to the major allele (e.g.,
    allele len=0 is the major allele), we're modeling error of the means
    and variances.
    LATER: Use several regression formulae to see how things look and choose
    the best fit?  For now, the log regression seems (simply by eye) to be
    the better fit."""
    # Weights are just the number of observed sites for each majro allele
    weights = meanvar_ests['count']
    adjmean = [ na(mean) for mean in meanvar_ests['mean'] ]
    adjvar = [ na(var) for var in meanvar_ests['var'] ]
    lmdata = rpy.r.data_frame(major=meanvar_ests['major'], mean=adjmean, var=adjvar)
    meanmodel = rpy.r.lm(rpy.r("mean ~ log(major)"), data=lmdata,
        weights=weights)
    varmodel = rpy.r.lm(rpy.r("var ~ log(major)"), data=lmdata,
        weights=weights)
    return meanmodel, varmodel
Esempio n. 34
0
File: ea.py Progetto: YuJinhui/kobas
def binom(m1, n1, m2, n2, **kargs):
    """do chi-square test on contingency table
    samples [(n, m),...]
    """
    p = float(m2)/n2
    alternative = kargs.get('alternative', 'greater')
    cmd = 'binom.test(c(%d, %d), p=%f, alternative="%s")' \
          % (m1, n1-m1, p, alternative)
    return rpy.r(cmd)['p.value']
Esempio n. 35
0
	def rpart_fit_and_predict(self, all_data, known_data, rpart_cp, loss_matrix, prior_prob, bit_string='11111'):
		"""
		11-09-05
			1st use known_data to get the fit model
			2nd use the fit model to do prediction on all_data, result is prob for each class
		11-09-05 add rpart_cp
		11-17-05
			add loss_matrix, prior_prob
			return two pred
		"""
		sys.stderr.write("rpart fitting and predicting...\n")
		r.library("rpart")
		coeff_name_list = ['p_value', 'recurrence', 'connectivity', 'cluster_size', 'gradient']
		formula_list = []
		for i in range(len(bit_string)):
			if bit_string[i] == '1':
				formula_list.append(coeff_name_list[i])
		#11-17-05 transform into array
		all_data = array(all_data)
		known_data = array(known_data)
		
		set_default_mode(NO_CONVERSION)
		data_frame = r.as_data_frame({"p_value":known_data[:,0], "recurrence":known_data[:,1], "connectivity":known_data[:,2], \
			"cluster_size":known_data[:,3], "gradient":known_data[:,4], "is_correct":known_data[:,-1]})
		if prior_prob:
			prior_prob = [prior_prob, 1-prior_prob]	#get the full list
			fit = r.rpart(r("is_correct~%s"%'+'.join(formula_list)), data=data_frame, method="class", control=r.rpart_control(cp=rpart_cp),\
				parms=r.list(prior=prior_prob, loss=r.matrix(loss_matrix) ) )
		else:
			fit = r.rpart(r("is_correct~%s"%'+'.join(formula_list)), data=data_frame, method="class", control=r.rpart_control(cp=rpart_cp),\
				parms=r.list(loss=r.matrix(loss_matrix) ) )
		
		set_default_mode(BASIC_CONVERSION)
		pred_training = r.predict(fit, data_frame, type=["class"])
		del data_frame
		
		set_default_mode(NO_CONVERSION)
		all_data_frame = r.as_data_frame({"p_value":all_data[:,0], "recurrence":all_data[:,1], "connectivity":all_data[:,2], \
			"cluster_size":all_data[:,3], "gradient":all_data[:,4], "is_correct":all_data[:,-1]})
		set_default_mode(BASIC_CONVERSION)
		pred = r.predict(fit, all_data_frame, type=["class"])	#11-17-05 type=c("class")
		del all_data_frame
		sys.stderr.write("Done rpart fitting and predicting.\n")
		return pred, pred_training
    def plotLabelLegend(self, colVecL, labelPhenoD, filename=None, legendDir=None, type='png'):

        if filename is None:
            filename = 'legend_label_%i' % len(labelPhenoD.keys())
        if legendDir is None:
            legendDir = self.legendDir
            
        full_filename = os.path.join(legendDir, filename)

        labelL = labelPhenoD.keys()
        labelL.sort()
        phenoL = [labelPhenoD[x] for x in labelL]
        
        rdev = plot_utilities.RDevice(name = full_filename, title='', plotType=type, 
            width=300, height=200)
        r("par(mar=c(0,0,0,0))")
        r.plot([1, 2, 3], type="n", xlab="", ylab="", main="", ann=False, axes=False)
        r.legend(x="center", legend = phenoL, fill=colVecL, bg = "white", bty="n", cex=0.7)
        rdev.close()
Esempio n. 37
0
 def Multtest(self,test_type):
     r('library("multtest")')
     filename = self.File()
     try: output_file = string.replace(filename,'input','output')
     except ValueError: output_file = filename[0:-4]+'-output.txt'
     print "Begining to process",filename
     parse_line = 'job<-read.table(%s,sep="\t", row.names=1, as.is=T)' % filename
     print_out = r(parse_line)
     print_out = r('matrix_size<-dim(job)')
     print_out = r('label<-job[1,2:matrix_size[2]]')
     print_out = r('jobdata<-job[2:matrix_size[1],2:matrix_size[2]]')
     if test_type == "f":
         print_out = r('ttest<-mt.maxT(jobdata,label, test="f", B=50000)')
     if test_type == "t":
         print_out = r('ttest<-mt.maxT(jobdata,label)')
     print_out = r('ttest2<-ttest[order(ttest[,1]),]')
     write_file = 'write.table(ttest2,%s,sep="\t")' % output_file
     print_out = r(write_file)
     print "Results written to:",output_file 
Esempio n. 38
0
    def randomForest_fit(self, known_data, parameter_list, bit_string="1111111"):
        """
		03-17-06
		2006-10-302006-10-30, add avg_degree(vertex_gradient) and unknown_cut_off
		"""
        if self.debug:
            sys.stderr.write("Fitting randomForest...\n")
        mty = parameter_list[0]

        from rpy import r

        r._libPaths(
            os.path.join(lib_path, "R")
        )  # better than r.library("randomForest", lib_loc=os.path.join(lib_path, "R")) (see plone doc)
        r.library("randomForest")

        coeff_name_list = [
            "p_value",
            "recurrence",
            "connectivity",
            "cluster_size",
            "gradient",
            "avg_degree",
            "unknown_ratio",
        ]  # 2006-10-30
        formula_list = []
        for i in range(len(bit_string)):
            if bit_string[i] == "1":
                formula_list.append(coeff_name_list[i])
        formula = r("is_correct~%s" % "+".join(formula_list))

        known_data = array(known_data)
        set_default_mode(NO_CONVERSION)
        data_frame = r.as_data_frame(
            {
                "p_value": known_data[:, 0],
                "recurrence": known_data[:, 1],
                "connectivity": known_data[:, 2],
                "cluster_size": known_data[:, 3],
                "gradient": known_data[:, 4],
                "avg_degree": known_data[:, 5],
                "unknown_ratio": known_data[:, 6],
                "is_correct": r.factor(known_data[:, -1]),
            }
        )  # 03-17-06, watch r.factor	#2006-10-30

        if mty > 0:
            fit = r.randomForest(formula, data=data_frame, mty=mty)
        else:
            fit = r.randomForest(formula, data=data_frame)

        del data_frame
        if self.debug:
            sys.stderr.write("Done fitting randomForest.\n")
        return fit
def verify_Cox(data_name):
    ''' Launch Cox in PomeloII, get results, and verify against R.'''
    print '\n\n\n******* Verifying Cox results for data set ' + data_name + '\n'
    coxConnect = flstandalone.NumTesting()
    coxConnect.setUp('http://pomelo2.bioinfo.cnio.es')
    coxConnect.send_get_pomelo(FILES_DIR + data_name + '.covar.txt',
                              FILES_DIR + data_name + '.surv.txt',
                              FILES_DIR + data_name + '.event.txt',
                              'Cox', '2')
    r_read = data_name + 'Pomelo <- readPomeloOutput()'
    r_compare = 'comparePomelo(' + data_name + 'Pomelo, ' + \
                     data_name + '.results)'
    ## r_read and r_compare are so that we can
    ## send to R the following two types of instructions
    ## rpy.r('breastPomelo <- readPomeloOutput()')
    ## rpy.r('comparePomelo(breastPomelo, breast.results)')
    rpy.r(r_read)
    out_comparison = rpy.r(r_compare)
    if (out_comparison == 'ERROR: test failed'):
        raise AsteriasAssertionError
Esempio n. 40
0
def interpolazionelineare(x, y):
    rpy.set_default_mode(rpy.NO_CONVERSION)  #serve per l'ultima parte in R
    linear_model = rpy.r.lm(rpy.r("y ~ x"), data=rpy.r.data_frame(x=x, y=y))
    rpy.set_default_mode(rpy.BASIC_CONVERSION)
    summary = rpy.r.summary(linear_model)
    #pendenza,errpendenza,intercetta,errintercetta
    risultati = (summary['coefficients'][0][0], \
                    summary['coefficients'][0][1], \
                    summary['coefficients'][1][0], \
                    summary['coefficients'][1][1])
    return risultati
Esempio n. 41
0
def wavelet_gaussian_denoising(spectrum):
    """Denoise data with pure Gaussian noise using wavelets

    Wrapper around the R packages EbayesThresh and wavethresh

    Parameters
    ----------
    spectrum : spectrum instance

    Returns
    -------
    Spectrum instance.
    """
    import_rpy()
    rpy.r.library('EbayesThresh')
    rpy.r.library('wavethresh')
    rpy.r['<-']('X',spectrum)
    rpy.r('Xwd  <- wd(X, bc="symmetric")')
    rpy.r('XwdT  <- ebayesthresh.wavelet(Xwd)')
    Xdn = rpy.r('Xdn  <- wr(XwdT)')
    return Xdn
Esempio n. 42
0
def wavelet_gaussian_denoising(spectrum):
    """Denoise data with pure Gaussian noise using wavelets
    
    Wrapper around the R packages EbayesThresh and wavethresh
    
    Parameters
    ----------
    spectrum : spectrum instance
    
    Returns
    -------
    Spectrum instance.
    """
    import_rpy()
    rpy.r.library('EbayesThresh')
    rpy.r.library('wavethresh')
    rpy.r['<-']('X', spectrum)
    rpy.r('Xwd  <- wd(X, bc="symmetric")')
    rpy.r('XwdT  <- ebayesthresh.wavelet(Xwd)')
    Xdn = rpy.r('Xdn  <- wr(XwdT)')
    return Xdn
Esempio n. 43
0
def anova(values, factor1, factor1name="factor1"):
    """ python wrapper for a one-way ANOVA in R """
    # build a dataframe for R
    dataframe = {}
    dataframe["feature"] = values
    dataframe["factor1"] = factor1
    r.assign("df", dataframe)
    r("df$factor1 <- factor( df$factor1 )")
    # run the model
    results = r("anova( lm( df$feature ~ df$factor1 ) )")
    r("rm( list=ls() )")
    # convert R results to table
    colheads = ["Df", "Sum Sq", "Mean Sq", "F value", "Pr( >F )"]
    rowheads = [factor1name, "error"]
    ndictData = {}
    for rowhead in results.keys():
        for index, name in zip(range(len(rowheads)), rowheads):
            dictName = ndictData.setdefault(name, {})
            dictName[rowhead] = results[rowhead][index]
    # return as zopy table
    return nesteddict2table(ndictData, rowheads, colheads)
Esempio n. 44
0
 def Multtest(self, test_type):
     r('library("multtest")')
     filename = self.File()
     try:
         output_file = string.replace(filename, 'input', 'output')
     except ValueError:
         output_file = filename[0:-4] + '-output.txt'
     print "Begining to process", filename
     parse_line = 'job<-read.table(%s,sep="\t", row.names=1, as.is=T)' % filename
     print_out = r(parse_line)
     print_out = r('matrix_size<-dim(job)')
     print_out = r('label<-job[1,2:matrix_size[2]]')
     print_out = r('jobdata<-job[2:matrix_size[1],2:matrix_size[2]]')
     if test_type == "f":
         print_out = r('ttest<-mt.maxT(jobdata,label, test="f", B=50000)')
     if test_type == "t":
         print_out = r('ttest<-mt.maxT(jobdata,label)')
     print_out = r('ttest2<-ttest[order(ttest[,1]),]')
     write_file = 'write.table(ttest2,%s,sep="\t")' % output_file
     print_out = r(write_file)
     print "Results written to:", output_file
Esempio n. 45
0
	def calibrate(self):
	
		"""
		Performs a calibration based on the available datapoints.				
		"""
		
		from rpy import r
	
		if len(self.pts) < 2:
			return False

		in_x = []
		in_y = []
		in_z = []
		out_x = []
		out_y = []
		out_z = []

		# Index all points so they can be fed into the R multiple linear regression
		for in_pt, out_pt in self.pts:
			in_x.append(in_pt[0])
			in_y.append(in_pt[1])
			in_z.append(in_pt[2])
			out_x.append(out_pt[0])
			out_y.append(out_pt[1])
			out_z.append(out_pt[2])
		
		# Perform the regression analysis
		fx = r.lm(r("x ~ a + b + c"), data = r.data_frame(a=in_x, b=in_y, c=in_z, x=out_x))["coefficients"]
		fy = r.lm(r("y ~ a + b + c"), data = r.data_frame(a=in_x, b=in_y, c=in_z, y=out_y))["coefficients"]
		fz = r.lm(r("z ~ a + b + c"), data = r.data_frame(a=in_x, b=in_y, c=in_z, z=out_z))["coefficients"]		
	
		self.fx = fx["(Intercept)"], fx["a"], fx["b"], fx["c"]
		self.fy = fy["(Intercept)"], fy["a"], fy["b"], fy["c"]
		self.fz = fz["(Intercept)"], fz["a"], fz["b"], fz["c"]
								
		self.calibrated = True
		
		return True		
Esempio n. 46
0
def spss_to_csv(spss_name, csv_name=None):
    """Convert spss file to csv file"""

    if csv_name is None:
        basename = os.path.splitext(spss_name)[0]
        csv_name = basename + ".csv"

    rpy.r("library(foreign)")
    data = rpy.r("read.spss('{0}')".format(spss_name))
    keys = data.keys()

    for key, values in data.items():
        types = set([type(val) for val in values])
        if len(types) != 1:
            sys.exit("ERROR: multiple types for {0}: {1}".format(key, types))
        first = values[0]

        if isinstance(first, float):
            values = [None if math.isnan(val) else val for val in values]

        if isinstance(first, float):
            values = to_dates(values, key)

        elif isinstance(first, float):
            values = [int(val) if val is not None and val.is_integer() else val
                      for val in values]
        data[key] = values

    row = 0
    with open(csv_name, "w") as fobj:
        writer = csv.writer(fobj)
        writer.writerow(keys)
        while True:
            try:
                line = [data[key][row] for key in keys]
                writer.writerow(line)
            except IndexError:
                break
            row += 1
Esempio n. 47
0
 def interpolazionelineare(self, other):
         """x.interpolazionelineare(y) esegue l'i.l. con x in ascissa e y in ordinata.
         x e y devono essere due oggetti della classe DatiSperimentali."""
         rpy.set_default_mode(rpy.NO_CONVERSION)
         linear_model = rpy.r.lm(rpy.r("y ~ x"), data = rpy.r.data_frame(x=self.valori, y=other.valori))
         rpy.set_default_mode(rpy.BASIC_CONVERSION)
         summary = rpy.r.summary(linear_model)
         #pendenza,errpendenza,intercetta,errintercetta
         risultati = (summary['coefficients'][0][0], \
                     summary['coefficients'][0][1], \
                     summary['coefficients'][1][0], \
                     summary['coefficients'][1][1])
         return risultati
Esempio n. 48
0
 def interpolazionelineare(self, other):
     rpy.set_default_mode(rpy.NO_CONVERSION)
     linear_model = rpy.r.lm(rpy.r("y ~ x"),
                             data=rpy.r.data_frame(x=self.valori,
                                                   y=other.valori))
     rpy.set_default_mode(rpy.BASIC_CONVERSION)
     summary = rpy.r.summary(linear_model)
     #pendenza,errpendenza,intercetta,errintercetta
     risultati = (summary['coefficients'][0][0], \
                 summary['coefficients'][0][1], \
                 summary['coefficients'][1][0], \
                 summary['coefficients'][1][1])
     return risultati
Esempio n. 49
0
def LinearRegression_lm(ls1,ls2,return_rsqrd):
    intercept = 0 ### when forced through the origin
    from rpy import r
    d = r.data_frame(x=ls1, y=ls2)
    model = r("y ~ x - 1") ### when not forced through the origin it is r("y ~ x")
    fitted_model = r.lm(model, data = d)
    slope = fitted_model['coefficients']['x']
    #intercept = fitted_model['coefficients']['(Intercept)']
    if return_rsqrd == 'yes':
        from scipy import stats
        rsqrd = math.pow(stats.linregress(ls1,ls2)[2],2)
        return slope,rsqrd
    else: return slope
Esempio n. 50
0
def loess(y, x=None, span=0.2):
    """locally weighted scatterplot smoothing
    
    Wrapper around the R funcion loess
    
    Parameters
    ----------
    spectrum : spectrum instance
    span : float
        parameter to control the smoothing
    
    Returns
    -------
    Spectrum instance.
    """
    import_rpy()
    if x is None:
        x = np.arange(0, len(y))
    rpy.r['<-']('x', x)
    rpy.r['<-']('y', y)
    rpy.r('y.loess <- loess(y ~ x, span = %s, data.frame(x=x, y=y))' % span)
    loess = rpy.r('y.predict <- predict(y.loess, data.frame(x=x))')
    return loess
Esempio n. 51
0
def LinearRegression(ls1,ls2,return_rsqrd):
    intercept = 0 ### when forced through the origin
    from rpy import r
    r.library('MASS')
    k = r.options(warn=-1) ### suppress all warning messages from R
    #print ls1; print ls2
    d = r.data_frame(x=ls1, y=ls2)
    model = r("y ~ x - 1") ### when not forced through the origin it is r("y ~ x")
    fitted_model = r.rlm(model, data = d) ###errors: rlm failed to converge in 20 steps - maxit=21
    slope = fitted_model['coefficients']['x']
    #intercept = fitted_model['coefficients']['(Intercept)']
    if return_rsqrd == 'yes':
        from scipy import stats
        rsqrd = math.pow(stats.linregress(ls1,ls2)[2],2)
        return slope,rsqrd
    else:
        return slope
Esempio n. 52
0
def wavelet_poissonian_denoising(spectrum):
    """Denoise data with pure Poissonian noise using wavelets
    
    Wrapper around the R packages EbayesThresh and wavethresh
    
    Parameters
    ----------
    spectrum : spectrum instance
    
    Returns
    -------
    Spectrum instance.
    """
    import_rpy()
    rpy.r.library('EbayesThresh')
    rpy.r.library('wavethresh')
    rpy.r['<-']('X', spectrum)
    rpy.r('XHF <- hft(X)')
    rpy.r('XHFwd  <- wd(XHF, bc="symmetric")')
    rpy.r('XHFwdT  <- ebayesthresh.wavelet(XHFwd)')
    rpy.r('XHFdn  <- wr(XHFwdT)')
    XHFest = rpy.r('XHFest <- hft.inv(XHFdn)')
    return XHFest
Esempio n. 53
0
def _generate_oat_trajectories(r, p, levels, jumps, mins, maxs):
	#Set up the R function and vectors
	oatf = rpy.r("sensitivity:::random.oat")
	levelsr = rpy.r.FloatVector([levels] * p)
	jumpsr = rpy.r.FloatVector([jumps] * p)

	#Create a list to hold the trajectories
	ts = []
	
	#Generate r trajectories consisting of p + 1 points
	for i in range(r):
		ts.append(io.Trajectory(points=[]))
		tsr = oatf(p , levelsr, jumpsr)
		for point in np.array(tsr):
			#Scale the point and add it to the trajectory
			scaled = [v * (mx - mn) + mn for v, mn, mx in zip(point, mins, maxs)]
			ts[-1].add_point(io.Point(scaled))
	
	return ts
def fitPoly(xarray, yarray, order):

    r.lm.local_mode(rpy.NO_CONVERSION)

    xl=list(xarray)
    yl=list(yarray)
    
    modelDef = "y ~ poly(x,%d)" % order
    model=r.lm(r(modelDef), data=r.data_frame(x=xl,y=yl))
    
    pred=r.predict(model)

# pred is now a dict with keys from '1' to 'N', where N is the size of xl

    predvals = []

    for i in range(len(xl)):
        predvals.append(pred[str(i+1)])
        
    return(xl, predvals)
Esempio n. 55
0
def smooth_data(data):
    sample_data=data[0]
    window_size=data[1]
    for rep_num in range(sample_data.get_number_of_replicates()):
        for chrom in sample_data.get_chromosome_list():
            met_manager = sample_data.get_manager_of_chrom(chrom)
            pos=[]
            m=[]
            cov=[]
            for methyl_c in met_manager:
                pos.append(methyl_c.position)
                m.append(methyl_c.get_methylrate(rep_num))
                cov.append(methyl_c.get_coverage(rep_num))
            r.warnings()
            r.library("locfit")
            r.assign("pos",pos)
            r.assign("m",m)
            r.assign("cov",cov)
            r.assign("h",window_size)
            r("posm=data.frame(pos,m)")
            r("fit=locfit(m~lp(pos,h=h),data=posm,maxk=1000000,weights=cov)")
            r("pp=preplot(fit,where='data',band='local',newdata=data.frame(pos=pos))")
            fit=r("pp")["fit"]
            list=r("unlist(pp$xev$xev)")
            for i, each in enumerate(list):
                position=int(each[0])
                methyl_c=met_manager.get_methyl_c(position)
                if methyl_c:
                    smoothedrate=None
                    if 1 <= fit[i]:
                        smoothedrate=1
                    elif fit[i] <= 0:
                        smoothedrate=0
                    else:
                        smoothedrate=fit[i]
                    methyl_c.update_methylrate(rep_num,smoothedrate)
                else:
                    sys.stderr.write("methyl_c doesn't exist at %d",position)
                    sys.exit(1)
Esempio n. 56
0
def read_data(spss_data_file):
    r("library('foreign')")

    r("data = read.spss(file='"+spss_data_file+"',to.data.frame=TRUE)")
    data_sheet = r("data")
    num_columns = len(data_sheet.keys())
    
    column_labels = []
    data_list = []
    for i in range(1,num_columns+1):
        #column_label = 
        column = r("data["+str(i)+"]")
        label = column.keys()[0] # only has one key 
        column_labels.append(label) 
        data_list.append(column[label])

    return data_list, column_labels