Python lm Beispiele, rpy.r.lm Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: ex2.py Projekt: franapoli/pyleaf

def regression(data):
    """Calls R's lm to make a linear regression on each of its inputs."""

    reg = r.lm(r('x ~ y'),
            data = r.data_frame(x=data[:,0], y=data[:,1])
            )['coefficients']

    return reg

Beispiel #2

0

Datei anzeigen

Datei: regression.rpy.py Projekt: danielmoraes/product-rank

 def lm(self, l, h):
     for i in range(l, h + 1):
         data_frame, data_model = self.mount_reg_params(i)
         print data_model
         rpy.set_default_mode(rpy.NO_CONVERSION)
         linear_model = r.lm(r(data_model), data=data_frame)
         rpy.set_default_mode(rpy.BASIC_CONVERSION)
         print r.summary(linear_model)['r.squared']

Beispiel #3

0

Datei anzeigen

Datei: calibrate.py Projekt: smathot/mantra

	def calibrate(self):
	
		"""
		Performs a calibration based on the available datapoints.				
		"""
		
		from rpy import r
	
		if len(self.pts) < 2:
			return False

		in_x = []
		in_y = []
		in_z = []
		out_x = []
		out_y = []
		out_z = []

		# Index all points so they can be fed into the R multiple linear regression
		for in_pt, out_pt in self.pts:
			in_x.append(in_pt[0])
			in_y.append(in_pt[1])
			in_z.append(in_pt[2])
			out_x.append(out_pt[0])
			out_y.append(out_pt[1])
			out_z.append(out_pt[2])
		
		# Perform the regression analysis
		fx = r.lm(r("x ~ a + b + c"), data = r.data_frame(a=in_x, b=in_y, c=in_z, x=out_x))["coefficients"]
		fy = r.lm(r("y ~ a + b + c"), data = r.data_frame(a=in_x, b=in_y, c=in_z, y=out_y))["coefficients"]
		fz = r.lm(r("z ~ a + b + c"), data = r.data_frame(a=in_x, b=in_y, c=in_z, z=out_z))["coefficients"]		
	
		self.fx = fx["(Intercept)"], fx["a"], fx["b"], fx["c"]
		self.fy = fy["(Intercept)"], fy["a"], fy["b"], fy["c"]
		self.fz = fz["(Intercept)"], fz["a"], fz["b"], fz["c"]
								
		self.calibrated = True
		
		return True

Beispiel #4

0

Datei anzeigen

Datei: regresion.py Projekt: nesaro/driza

def funcion(dato,variable,caso,opciones):  # Cambiar cosa por caso
    from rpy import r #pylint: disable=import-error
    variable1 = variable[0]
    variable2 = variable[1]
    lista1=dato.query(variable1,caso)
    lista2=dato.query(variable2,caso)
    #lista2=[float(x) for x in dato.getCol(variable2,caso=caso)]
    resultadoprueba=r.lm(r("y ~ x"),data=r.data_frame(x=lista1, y=lista2))
    sumario=r.summary_lm(resultadoprueba,True)
    anova=r.anova_lm(resultadoprueba)
    #resultadoprueba=r.lsfit(lista1,lista2)
    midiccionario={"resultado":resultadoprueba,"sumario":sumario,"anova":anova}
    return midiccionario

Beispiel #5

0

Datei anzeigen

Datei: statistics.py Projekt: kdaily/altanalyze

def LinearRegression_lm(ls1,ls2,return_rsqrd):
    intercept = 0 ### when forced through the origin
    from rpy import r
    d = r.data_frame(x=ls1, y=ls2)
    model = r("y ~ x - 1") ### when not forced through the origin it is r("y ~ x")
    fitted_model = r.lm(model, data = d)
    slope = fitted_model['coefficients']['x']
    #intercept = fitted_model['coefficients']['(Intercept)']
    if return_rsqrd == 'yes':
        from scipy import stats
        rsqrd = math.pow(stats.linregress(ls1,ls2)[2],2)
        return slope,rsqrd
    else: return slope

Beispiel #6

0

Datei anzeigen

def LinearRegression_lm(ls1,ls2,return_rsqrd):
    intercept = 0 ### when forced through the origin
    from rpy import r
    d = r.data_frame(x=ls1, y=ls2)
    model = r("y ~ x - 1") ### when not forced through the origin it is r("y ~ x")
    fitted_model = r.lm(model, data = d)
    slope = fitted_model['coefficients']['x']
    #intercept = fitted_model['coefficients']['(Intercept)']
    if return_rsqrd == 'yes':
        from scipy import stats
        rsqrd = math.pow(stats.linregress(ls1,ls2)[2],2)
        return slope,rsqrd
    else: return slope

Beispiel #7

0

Datei anzeigen

Datei: rpyd_server.py Projekt: hammer/rpyd

  def lm(self, Y, X):
    observations = {}

    # here's where we would handle factors
    for predictor in X:
      observations[predictor.name] = predictor.data

    # get Wilkinson-Rogers notation for model
    wr_model = "%s ~ %s" % (Y.name, " + ".join(observations.keys()))

    # add y to the model
    observations[Y.name] = Y.data

    # fit the model
    model = r.lm(r(wr_model), data = observations)

    return model['coefficients']

Beispiel #8

0

Datei anzeigen

Datei: fitPoly.py Projekt: provingground-moe/photocal_analysis

def fitPoly(xarray, yarray, order):

    r.lm.local_mode(rpy.NO_CONVERSION)

    xl=list(xarray)
    yl=list(yarray)
    
    modelDef = "y ~ poly(x,%d)" % order
    model=r.lm(r(modelDef), data=r.data_frame(x=xl,y=yl))
    
    pred=r.predict(model)

# pred is now a dict with keys from '1' to 'N', where N is the size of xl

    predvals = []

    for i in range(len(xl)):
        predvals.append(pred[str(i+1)])
        
    return(xl, predvals)

Beispiel #9

0

Datei anzeigen

Datei: combiner.py Projekt: Brian-Tomasik/Combine-Data-Sources-for-Semantic-Music-Discovery

 def _independent_betas_same_sources(self, tag_list, remove_tags_when_bad_regression, n_times_show_summary=3):
     times_showed_summary = 0 # This allows us to print out some summary statistics without producing an overwhelming amount of output.
     SUMMARY_STATS = ["beta", "stderr", "tstat", "pval"]
     for tag in tag_list:
         self._progress("Computing betas for tag %s." % tag, newline=True) # rmme: newline make false
         rpy.set_default_mode(rpy.NO_CONVERSION) # Turn off conversion so that lm returns Robj.
         data = rc.list(y=self.y[tag],X=self.X[tag])
         model = "y~X-1" # Use -1 because X has an intercept already
         if self.regtype=="Independent Linear":
             try:
                 result = rc.lm(model,data=data)
             except:
                 pdb.set_trace()
         elif self.regtype=="Independent Logistic":
             result = rc.glm(model,family=rc.binomial("logit"),data=data)
         rpy.set_default_mode(rpy.BASIC_CONVERSION) # Return to normal conversion mode.
         summary = rc.summary(result,correlation=rc.TRUE)
         self._record_regression_stats(tag, summary)
         beta_dict = dict()
         sorted_sources = self.sorted_sources[tag]
         coeff_matrix = summary["coefficients"]
         for i in range(len(sorted_sources)):
             try:
                 cur_source_dict = dict(zip(SUMMARY_STATS,coeff_matrix[i,:]))
             except IndexError:
                 util.info("\tWARNING: Regression for %s didn't end up using all variables." % tag)
                 if remove_tags_when_bad_regression:
                     self._remove_tag(tag)
                     break # break from for-loop over sorted_sources; we don't continue out of the per-tag for loop until later when we check if tag is in self.features....
                 continue
             try:
                 cur_source_dict["-log10(pval)"] = -log(cur_source_dict["pval"], 10)
             except OverflowError:
                 pass
             beta_dict[sorted_sources[i]] = cur_source_dict
         if tag not in self.features: # We've removed this tag a few lines above, so skip it.
             continue
         self.beta[tag] = beta_dict
         if times_showed_summary < n_times_show_summary:
             self._print_regression_summary(tag, summary)
             times_showed_summary += 1

Beispiel #10

0

Datei anzeigen

Datei: example_rpy.py Projekt: PaulGureghian1/Statsmodels

from __future__ import print_function
from statsmodels.compat.python import iterkeys
from rpy import r
import numpy as np
import statsmodels.api as sm

examples = [1, 2]

if 1 in examples:
    data = sm.datasets.longley.load(as_pandas=False)
    y, x = data.endog, sm.add_constant(data.exog, prepend=False)
    des_cols = ['x.%d' % (i + 1) for i in range(x.shape[1])]
    formula = r('y~%s-1' % '+'.join(des_cols))
    frame = r.data_frame(y=y, x=x)
    results = r.lm(formula, data=frame)
    print(list(iterkeys(results)))
    print(results['coefficients'])

if 2 in examples:
    data2 = sm.datasets.star98.load(as_pandas=False)
    y2, x2 = data2.endog, sm.add_constant(data2.exog, prepend=False)
    import rpy
    y2 = y2[:, 0] / y2.sum(axis=1)
    des_cols2 = ['x.%d' % (i + 1) for i in range(x2.shape[1])]
    formula2 = r('y~%s-1' % '+'.join(des_cols2))
    frame2 = r.data_frame(y=y2, x=x2)
    results2 = r.glm(formula2, data=frame2, family='binomial')
    params_est = [
        results2['coefficients'][k] for k in sorted(results2['coefficients'])
    ]

Beispiel #11

0

Datei anzeigen

Datei: example_rpy.py Projekt: chrisjordansquire/statsmodels

'''

from rpy import r
import numpy as np
import scikits.statsmodels.api as sm


examples = [1, 2]

if 1 in examples:
    data = sm.datasets.longley.load()
    y,x = data.endog, sm.add_constant(data.exog)
    des_cols = ['x.%d' % (i+1) for i in range(x.shape[1])]
    formula = r('y~%s-1' % '+'.join(des_cols))
    frame = r.data_frame(y=y, x=x)
    results = r.lm(formula, data=frame)
    print results.keys()
    print results['coefficients']

if 2 in examples:
    data2 = sm.datasets.star98.load()
    y2,x2 = data2.endog, sm.add_constant(data2.exog)
    import rpy
    y2 = y2[:,0]/y2.sum(axis=1)
    des_cols2 = ['x.%d' % (i+1) for i in range(x2.shape[1])]
    formula2 = r('y~%s-1' % '+'.join(des_cols2))
    frame2 = r.data_frame(y=y2, x=x2)
    results2 = r.glm(formula2, data=frame2, family='binomial')
    params_est = [results2['coefficients'][k] for k
                    in sorted(results2['coefficients'])]
    print params_est

Beispiel #12

0

Datei anzeigen

    for id in py_id:
        x1 = poly_x_vals[i,0]
        x2 = poly_x_vals[i,1]
        y1 = poly_y_vals[i,0]
        y2 = poly_y_vals[i,1]
        xy = poly_xy_vals[i]
        if poly_values: poly_values = poly_values + ","
        poly_values += "(%s, %f, %f, %f, %f, %f)" % (id, x1, x2, y1, y2, xy)
        i = i+1

    query = query + poly_values
#    print query
    c.execute(query)

    
    model = r.lm(r("delta ~ poly(x, 2) + poly(y, 2) + poly(x*y, 1)"), data=r.data_frame(x=py_x, y=py_y, delta=py_delta), weights=py_wt)
    model_summary = r.summary(model)
    model_coeff = array(model_summary['coefficients'])
    if not model_coeff.shape == (6,4):
        print "Bad model for %s" % exp
        continue
    
    c0 = model_coeff[0][0]
    c0_sigma = model_coeff[0][1]
    cx1 = model_coeff[1][0]
    cx1_sigma = model_coeff[1][1]
    cx2 = model_coeff[2][0]
    cx2_sigma = model_coeff[2][1]
    cy1 = model_coeff[3][0]
    cy1_sigma = model_coeff[3][1]
    cy2 = model_coeff[4][0]

Beispiel #13

0

Datei anzeigen

Datei: data2phylocontrasts.py Projekt: CGATOxford/Optic

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-c", "--columns", dest="columns", type="string",
                      help="columns to take for calculating histograms.")
    parser.add_option("-t", "--tree-nh-file", dest="filename_tree",
                      type="string",
                      help="filename with tree(s).")
    parser.add_option("--skip-header", dest="add_header", action="store_false",
                      help="do not add header to flat format.")
    parser.add_option("--output-with-header", dest="write_header",
                      action="store_true",
                      help="write header and exit.")
    parser.add_option("--debug", dest="debug", action="store_true",
                      help="debug mode")
    parser.add_option("--display-tree", dest="display_tree",
                      action="store_true",
                      help="display the tree")

    parser.add_option("-m", "--method", dest="methods", type="choice",
                      action="append",
                      choices=("contrasts", "spearman", "pearson",
                               "compute"),
                      help="methods to perform on contrasts.")

    parser.set_defaults(
        columns="all",
        filename_tree=None,
        add_header=True,
        write_header=False,
        debug=False,
        methods=[],
        value_format="%6.4f",
        pvalue_format="%e",
        display_tree=False,
    )

    (options, args) = E.Start(parser, quiet=True)

    if options.columns not in ("all", "all-but-first"):
        options.columns = map(lambda x: int(x) - 1, options.columns.split(","))

    phylip = WrapperPhylip.Phylip()

    if options.debug:
        phylip.setLogLevel(options.loglevel)

    phylip.setProgram("contrast")

    ##########################################################
    ##########################################################
    ##########################################################
    # retrieve data and give to phylip
    data = []
    headers = []
    first = True
    for line in sys.stdin:
        if line[0] == "#":
            continue
        d = line[:-1].strip().split("\t")
        if first:
            first = False
            headers = d[1:]
            continue
        data.append(d)

    phylip.setData(data)
    ncolumns = len(headers)
    nrows = len(data)

    ##########################################################
    ##########################################################
    ##########################################################
    # read trees
    nexus = None
    if options.filename_tree:
        nexus = TreeTools.Newick2Nexus(open(options.filename_tree, "r"))

    if not nexus:
        raise ValueError("please provide trees with branchlenghts")

    ##########################################################
    ##########################################################
    ##########################################################
    # set up phylip
    phylip_options = []
    # print out contrasts
    phylip_options.append("C")
    phylip_options.append("Y")
    phylip.setOptions(phylip_options)

    ##########################################################
    ##########################################################
    ##########################################################
    # main loop
    ##########################################################
    for tree in nexus.trees:

        if options.display_tree:
            tree.display()

        # compute this before giving the tree to the phylip module,
        # as it remaps taxon names.
        map_node2data = {}
        for x in range(nrows):
            taxon = data[x][0]
            map_node2data[tree.search_taxon(taxon)] = x

        phylip.setTree(tree)

        result = phylip.run()

        for method in options.methods:

            if method in ("pearson", "spearman"):

                options.stdout.write("header1\theader2\tr\tp\tcode\n")

                # n = len(result.mContrasts)
                columns = []
                for c in range(ncolumns):
                    columns.append(map(lambda x: x[c], result.mContrasts))

                for x in range(0, ncolumns - 1):
                    for y in range(x + 1, ncolumns):

                        # phylip value
                        phy_r = result.mCorrelations[x][y]

                        import rpy
                        from rpy import r as R

                        # Various ways to calculate r. It is not
                        # possible to use cor.test or lsfit directly,
                        # as you have to perform a regression through
                        # the origin.

                        # uncomment to check pearson r against
                        # phylip's value r =
                        # calculateCorrelationCoefficient(columns[x],
                        # columns[y])

                        # for significance, use linear regression models in R
                        rpy.set_default_mode(rpy.NO_CONVERSION)
                        linear_model = R.lm(
                            R("y ~ x - 1"), data=R.data_frame(x=columns[x],
                                                              y=columns[y]))
                        rpy.set_default_mode(rpy.BASIC_CONVERSION)

                        ss = R.summary(linear_model)

                        # extract the p-value
                        p = ss['coefficients'][-1][-1]

                        if p < 0.001:
                            code = "***"
                        elif p < 0.01:
                            code = "**"
                        elif p < 0.05:
                            code = "*"
                        else:
                            code = ""

                        options.stdout.write("\t".join(
                            (headers[x], headers[y],
                             options.value_format % phy_r,
                             options.pvalue_format % p,
                             code)) + "\n")

            elif method == "contrasts":

                options.stdout.write("\t".join(headers) + "\n")
                for d in result.mContrasts:
                    options.stdout.write(
                        "\t".join(
                            map(lambda x: options.value_format % x, d)) + "\n")

            elif method == "compute":

                # make room for all internal nodes and one dummy node
                # for unrooted trees.
                max_index = TreeTools.GetMaxIndex(tree) + 2
                variances = [None] * max_index
                values = [[None] * nrows for x in range(max_index)]
                contrasts = []
                for x in range(max_index):
                    contrasts.append([None] * ncolumns)
                branchlengths = [None] * max_index

                def update_data(node_id, bl, c1, c2, ):

                    b1, b2 = branchlengths[c1], branchlengths[c2]
                    rb1 = 1.0 / b1
                    rb2 = 1.0 / b2
                    # compute variance
                    variance = math.sqrt(b1 + b2)

                    # extend branch length of this node to create correct
                    # variance for parent
                    branchlengths[node_id] = bl + (b1 * b2) / (b1 + b2)
                    variances[node_id] = variance

                    for c in range(ncolumns):
                        v1, v2 = values[c1][c], values[c2][c]
                        # save ancestral value as weighted mean
                        values[node_id][c] = (
                            (rb1 * v1 + rb2 * v2)) / (rb1 + rb2)
                        # compute normalized contrast
                        contrasts[node_id][c] = (v1 - v2) / variance

                def update_contrasts(node_id):
                    """update contrasts for a node."""
                    node = tree.node(node_id)
                    if node.succ:
                        if len(node.succ) == 2:
                            c1, c2 = node.succ
                            update_data(
                                node_id, node.data.branchlength, c1, c2)
                        else:
                            assert(node_id == tree.root)
                            assert(len(node.succ) == 3)
                            update_data(
                                node_id, node.data.branchlength,
                                node.succ[0], node.succ[1])
                            update_data(
                                max_index - 1, node.data.branchlength,
                                node_id, node.succ[2])
                    else:
                        for c in range(ncolumns):
                            values[node_id][c] = float(
                                data[map_node2data[node_id]][c + 1])

                        branchlengths[node_id] = node.data.branchlength

                tree.dfs(tree.root, post_function=update_contrasts)

                options.stdout.write(
                    "node_id\tvariance\t%s\n" % "\t".join(headers))
                for node_id in range(max_index):
                    if variances[node_id] is None:
                        continue
                    options.stdout.write("%s\t%s\t%s\n" % (
                        node_id,
                        options.value_format % variances[
                            node_id],
                        "\t".join(
                            map(lambda x: options.value_format % x,
                                contrasts[node_id])),
                    ))

    E.Stop()

Beispiel #14

0

Datei anzeigen

Datei: data2phylocontrasts.py Projekt: lesheng/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: data2phylocontrasts.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-c",
                      "--columns",
                      dest="columns",
                      type="string",
                      help="columns to take for calculating histograms.")
    parser.add_option("-t",
                      "--filename-tree",
                      dest="filename_tree",
                      type="string",
                      help="filename with tree(s).")
    parser.add_option("--skip-header",
                      dest="add_header",
                      action="store_false",
                      help="do not add header to flat format.")
    parser.add_option("--write-header",
                      dest="write_header",
                      action="store_true",
                      help="write header and exit.")
    parser.add_option("--debug",
                      dest="debug",
                      action="store_true",
                      help="debug mode")
    parser.add_option("--display-tree",
                      dest="display_tree",
                      action="store_true",
                      help="display the tree")

    parser.add_option("-m",
                      "--method",
                      dest="methods",
                      type="choice",
                      action="append",
                      choices=("contrasts", "spearman", "pearson", "compute"),
                      help="methods to perform on contrasts.")

    parser.set_defaults(
        columns="all",
        filename_tree=None,
        add_header=True,
        write_header=False,
        debug=False,
        methods=[],
        value_format="%6.4f",
        pvalue_format="%e",
        display_tree=False,
    )

    (options, args) = E.Start(parser, quiet=True)

    if options.columns not in ("all", "all-but-first"):
        options.columns = map(lambda x: int(x) - 1, options.columns.split(","))

    phylip = WrapperPhylip.Phylip()

    if options.debug:
        phylip.setLogLevel(options.loglevel)

    phylip.setProgram("contrast")

    ##########################################################
    ##########################################################
    ##########################################################
    # retrieve data and give to phylip
    data = []
    headers = []
    first = True
    for line in sys.stdin:
        if line[0] == "#":
            continue
        d = line[:-1].strip().split("\t")
        if first:
            first = False
            headers = d[1:]
            continue
        data.append(d)

    phylip.setData(data)
    ncolumns = len(headers)
    nrows = len(data)

    ##########################################################
    ##########################################################
    ##########################################################
    # read trees
    nexus = None
    if options.filename_tree:
        nexus = TreeTools.Newick2Nexus(open(options.filename_tree, "r"))

    if not nexus:
        raise ValueError("please provide trees with branchlenghts")

    ##########################################################
    ##########################################################
    ##########################################################
    # set up phylip
    phylip_options = []
    # print out contrasts
    phylip_options.append("C")
    phylip_options.append("Y")
    phylip.setOptions(phylip_options)

    ##########################################################
    ##########################################################
    ##########################################################
    # main loop
    ##########################################################
    for tree in nexus.trees:

        if options.display_tree:
            tree.display()

        # compute this before giving the tree to the phylip module,
        # as it remaps taxon names.
        map_node2data = {}
        for x in range(nrows):
            taxon = data[x][0]
            map_node2data[tree.search_taxon(taxon)] = x

        phylip.setTree(tree)

        result = phylip.run()

        for method in options.methods:

            if method in ("pearson", "spearman"):

                options.stdout.write("header1\theader2\tr\tp\tcode\n")

                n = len(result.mContrasts)
                columns = []
                for c in range(ncolumns):
                    columns.append(map(lambda x: x[c], result.mContrasts))

                for x in range(0, ncolumns - 1):
                    for y in range(x + 1, ncolumns):

                        # phylip value
                        phy_r = result.mCorrelations[x][y]

                        import rpy
                        from rpy import r as R

                        # Various ways to calculate r. It is not possible to use
                        # cor.test or lsfit directly, as you have to perform a
                        # regression through the origin.

                        # uncomment to check pearson r against phylip's value
                        ## r = calculateCorrelationCoefficient( columns[x], columns[y] )

                        # for significance, use linear regression models in R
                        rpy.set_default_mode(rpy.NO_CONVERSION)
                        linear_model = R.lm(R("y ~ x - 1"),
                                            data=R.data_frame(x=columns[x],
                                                              y=columns[y]))
                        rpy.set_default_mode(rpy.BASIC_CONVERSION)

                        ss = R.summary(linear_model)

                        # extract the p-value
                        p = ss['coefficients'][-1][-1]

                        if p < 0.001:
                            code = "***"
                        elif p < 0.01:
                            code = "**"
                        elif p < 0.05:
                            code = "*"
                        else:
                            code = ""

                        options.stdout.write("\t".join(
                            (headers[x], headers[y], options.value_format %
                             phy_r, options.pvalue_format % p, code)) + "\n")

            elif method == "contrasts":

                options.stdout.write("\t".join(headers) + "\n")
                for d in result.mContrasts:
                    options.stdout.write(
                        "\t".join(map(lambda x: options.value_format % x, d)) +
                        "\n ")

            elif method == "compute":

                # make room for all internal nodes and one dummy node
                # for unrooted trees.
                max_index = TreeTools.GetMaxIndex(tree) + 2
                variances = [None] * max_index
                values = [[None] * nrows for x in range(max_index)]
                contrasts = []
                for x in range(max_index):
                    contrasts.append([None] * ncolumns)
                branchlengths = [None] * max_index

                def update_data(
                    node_id,
                    bl,
                    c1,
                    c2,
                ):

                    b1, b2 = branchlengths[c1], branchlengths[c2]
                    rb1 = 1.0 / b1
                    rb2 = 1.0 / b2
                    # compute variance
                    variance = math.sqrt(b1 + b2)

                    # extend branch length of this node to create correct
                    # variance for parent
                    branchlengths[node_id] = bl + (b1 * b2) / (b1 + b2)
                    variances[node_id] = variance

                    for c in range(ncolumns):
                        v1, v2 = values[c1][c], values[c2][c]
                        # save ancestral value as weighted mean
                        values[node_id][c] = (
                            (rb1 * v1 + rb2 * v2)) / (rb1 + rb2)
                        # compute normalized contrast
                        contrasts[node_id][c] = (v1 - v2) / variance

                def update_contrasts(node_id):
                    """update contrasts for a node."""
                    node = tree.node(node_id)
                    if node.succ:
                        if len(node.succ) == 2:
                            c1, c2 = node.succ
                            update_data(node_id, node.data.branchlength, c1,
                                        c2)
                        else:
                            assert (node_id == tree.root)
                            assert (len(node.succ) == 3)
                            update_data(node_id, node.data.branchlength,
                                        node.succ[0], node.succ[1])
                            update_data(max_index - 1, node.data.branchlength,
                                        node_id, node.succ[2])
                    else:
                        for c in range(ncolumns):
                            values[node_id][c] = float(
                                data[map_node2data[node_id]][c + 1])

                        branchlengths[node_id] = node.data.branchlength

                tree.dfs(tree.root, post_function=update_contrasts)

                options.stdout.write("node_id\tvariance\t%s\n" %
                                     "\t".join(headers))
                for node_id in range(max_index):
                    if variances[node_id] is None:
                        continue
                    options.stdout.write("%s\t%s\t%s\n" % (
                        node_id,
                        options.value_format % variances[node_id],
                        "\t".join(
                            map(lambda x: options.value_format % x,
                                contrasts[node_id])),
                    ))

    E.Stop()

Beispiel #15

0

Datei anzeigen

Datei: data2phylocontrasts.py Projekt: siping/cgat

                        # phylip value
                        phy_r = result.mCorrelations[x][y]

                        import rpy
                        from rpy import r as R
                        
                        ## Various ways to calculate r. It is not possible to use
                        ## cor.test or lsfit directly, as you have to perform a
                        ## regression through the origin.
                        
                        ## uncomment to check pearson r against phylip's value
                        ## r = calculateCorrelationCoefficient( columns[x], columns[y] )

                        ## for significance, use linear regression models in R
                        rpy.set_default_mode(rpy.NO_CONVERSION)
                        linear_model = R.lm(R("y ~ x - 1"), data = R.data_frame(x=columns[x], y=columns[y]))
                        rpy.set_default_mode(rpy.BASIC_CONVERSION)

                        ss = R.summary(linear_model)

                        ## extract the p-value
                        p = ss['coefficients'][-1][-1]

                        if p < 0.001:
                            code = "***"
                        elif p < 0.01:
                            code = "**"
                        elif p < 0.05:
                            code = "*"
                        else:
                            code = ""