Python summary Examples, rpy2.robjects.r.summary Python Examples

Example #1

0

Show file

File: linregress.py Project: daler/linregress

def ancova(lm1, lm2,  names=('lm1', 'lm2')):
    """
    Compares the slopes and intercepts of two linear models.  Currently this is
    quite limited in that it only compares single-variable linear models that
    have `x` and `y` attributes.

    Returns (pval of slope difference, pval of intercept difference).

    Recall that if the slope is significant, you can't really say anything
    about the intercept.

    """
    # R code, from the extremely useful blog:
    # http://r-eco-evo.blogspot.com/2011/08/
    #           comparing-two-regression-slopes-by.html
    #
    # model1 = aov(y~x*factor, data=df)
    # (interaction term on summary(model1)'s 3rd table line)
    #
    # model2 = aov(y~x+factor, data=df)
    # (2nd table line for "factor" in summary(model2) is the sig of intercept
    # diff)
    #
    # anova(model1, model2)
    #  does removing the interaction term affect the model fit?

    # Construct variables suitable for ANOVA/ANCOVA
    label1 = [names[0] for i in lm1.x]
    label2 = [names[1] for i in lm2.x]
    labels = r.factor(np.array(label1 + label2))
    xi = np.concatenate((lm1.x, lm2.x))
    yi = np.concatenate((lm1.y, lm2.y))

    # The workflow is to populate the formula as a separate environment.
    # This first formula includes the interaction term
    fmla1 = robjects.Formula('yi~xi*labels')
    fmla1.environment['xi'] = xi
    fmla1.environment['yi'] = yi
    fmla1.environment['labels'] = labels
    result1 = r('aov(%s)' % fmla1.r_repr())
    interaction_pval = r.summary(result1)[0].rx2('Pr(>F)')[2]

    # No interaction term
    fmla2 = robjects.Formula('yi~xi+labels')
    fmla2.environment['xi'] = xi
    fmla2.environment['yi'] = yi
    fmla2.environment['labels'] = labels
    result2 = r('aov(%s)' % fmla2.r_repr())
    intercept_pval = r.summary(result2)[0].rx2('Pr(>F)')[1]

    # TODO: anova(result1, result2)?

    return interaction_pval, intercept_pval

Example #2

0

Show file

File: handy_r.py Project: HuttleyLab/geneticdistance

def through_the_origin(x, y):
    df = DataFrame({'x': FloatVector(x), 'y': FloatVector(y)})
    s = r.summary(r.lm('y ~ 0 + x', df))
    return {
        'coefficient': s.rx2('coefficients')[0],
        'stderr': s.rx2('coefficients')[1],
        'r.squared': s.rx2('r.squared')[0]
    }

Example #3

0

Show file

File: proportions.py Project: gokceneraslan/sctoolkit

def dirichletreg_df(prop_df,
                    covar_df,
                    formula,
                    onevsrest_category=None,
                    return_reg_input=False):
    from rpy2.robjects import r, Formula
    from rpy2.robjects.packages import importr
    from rpy2.rinterface_lib.callbacks import logger as rpy2_logger

    dr = importr('DirichletReg')
    dr_df = pd.concat([prop_df, covar_df], axis=1)

    f = Formula(formula)

    rpy2_logger.setLevel(
        logging.ERROR)  # will display errors, but not warnings
    f.environment['y'] = dr.DR_data(py2r(prop_df))
    rpy2_logger.setLevel(
        logging.WARNING)  # will display errors, but not warnings

    if onevsrest_category is None:
        fit = dr.DirichReg(f, py2r(dr_df))
    else:
        assert onevsrest_category in prop_df.columns
        cat_index = prop_df.columns.tolist().index(onevsrest_category) + 1
        fit = dr.DirichReg(f,
                           py2r(dr_df),
                           model='alternative',
                           **{'sub.comp': cat_index})

    r.sink(file='/dev/null')
    u = r.summary(fit)
    r.sink()
    if r('sink.number')()[0] > 0:
        r.sink()

    if onevsrest_category is None:
        varnames = u.rx2('varnames')
    else:
        varnames = [onevsrest_category] * 2

    coef_mat = u.rx2('coef.mat')
    rows = r2py(r('rownames')(coef_mat))
    coef_df = r2py(r('as.data.frame')(coef_mat)).reset_index(drop=True)
    coef_df.columns = ['coefficient', 'se', 'zval', 'pval']

    coef_df['compartment'] = np.repeat(varnames, r2py(u.rx2('n.vars')))
    coef_df['variable'] = rows
    coef_df['significance'] = bin_pval(coef_df.pval)

    if onevsrest_category is not None:
        coef_df['coef_type'] = np.repeat(['mean', 'precision'],
                                         r2py(u.rx2('n.vars')))

    if return_reg_input:
        return dr_df, coef_df
    else:
        return coef_df

Example #4

0

Show file

File: extract_armies.py Project: syhw/clusterize_RTS_data

def clusterize_r_em(*args):
    """ Clustering and plotting with EM GMM"""
    try:
        from rpy2.robjects import r
        import rpy2.robjects.numpy2ri
        rpy2.robjects.numpy2ri.activate()
    except:
        print "You need rpy2"
        sys.exit(-1)

    r.library("mclust")
    for arg in args:
        model = r.Mclust(arg)
        print model
        print r.summary(model)
        r.quartz("plot")
        r.plot(model, arg)
        print raw_input("any key to pass")

Example #5

0

Show file

File: extract_armies.py Project: syhw/clusterize_RTS_data

def clusterize_r_em(*args):
    """ Clustering and plotting with EM GMM"""
    try:
        from rpy2.robjects import r
        import rpy2.robjects.numpy2ri
        rpy2.robjects.numpy2ri.activate()
    except:
        print "You need rpy2"
        sys.exit(-1)

    r.library("mclust")
    for arg in args:
        model = r.Mclust(arg)
        print model
        print r.summary(model)
        r.quartz("plot")
        r.plot(model, arg)
        print raw_input("any key to pass")

Example #6

0

Show file

File: analysis.py Project: rajivnarayan/genomic-features-survival

    def km_plot_data(self, name, time, censor, values):
        values_df = pd.DataFrame(
            {
                'time': time,
                'censor': censor,
                'value': values
            }, dtype=float)
        mean_value = values_df.value.mean()
        values_df['high'] = values_df.value >= mean_value

        data = {
            'time': robjects.FloatVector(values_df['time']),
            'censor': robjects.IntVector(values_df['censor']),
            'high': robjects.IntVector(values_df['high'])
        }
        df = robjects.DataFrame(data)

        # p value
        km_diff = self.surv.survdiff(
            robjects.Formula('Surv(time, censor) ~ high'), data=df)
        chisq_ind = list(km_diff.names).index('chisq')
        pvalue = chi2.sf(km_diff[chisq_ind][0], 1)

        km = self.surv.survfit(robjects.Formula('Surv(time, censor) ~ high'),
                               data=df)
        summary = pandas2ri.ri2py(r.summary(km, extend=True))
        r.assign('km', km)
        r.assign('times', data['time'])
        r.assign('res', r('summary(km, times=times)'))
        cols = r('lapply(c(2:6, 8:11), function(x) res[x])')
        r.assign('cols', cols)
        km_results = r('do.call(data.frame, cols)')
        km_results = pd.DataFrame(km_results)

        low_km = km_results[km_results['strata'] == 'high=0']
        high_km = km_results[km_results['strata'] == 'high=1']

        high_time, high_percent = self.make_plottable_kms(
            high_km['time'], high_km['surv'])
        low_time, low_percent = self.make_plottable_kms(
            low_km['time'], low_km['surv'])

        high = [{
            'percent': i[0],
            'time': i[1]
        } for i in zip(high_percent, high_time)]
        low = [{
            'percent': i[0],
            'time': i[1]
        } for i in zip(low_percent, low_time)]

        return {'high': high, 'low': low, 'p': float('%.4g' % pvalue)}

Example #7

0

Show file

File: extract_armies.py Project: syhw/AnalyzeBWData

def clusterize_r_em(*args, **kwargs):
    """ Clustering and plotting with EM GMM"""
    try:
        from rpy2.robjects import r
        import rpy2.robjects.numpy2ri
        rpy2.robjects.numpy2ri.activate()
        from sklearn.decomposition import PCA
    except:
        print "You need rpy2"
        sys.exit(-1)

    r.library("mclust")
    for arg in args:
        if kwargs.get('clf_on_pca', False):
            pca = PCA(2)
            arg = pca.fit(arg).transform(arg)
        model = r.Mclust(arg)
        print model
        print r.summary(model)
        r.quartz("plot")
        r.plot(model, arg)
        print raw_input("press any key to pass")

Example #8

0

Show file

File: firststage.py Project: PhilErickson/LawStructural

 def _r_tobit(self, data, xvars, rbar):
     """ Estimate tobit with function from r """
     r.assign('data', com.convert_to_r_dataframe(data))
     rhs = '+'.join(xvars)
     model = r("vglm(OverallRank ~ "+ rhs +", \
                       family=tobit(Upper=" + str(rbar) + ", Lower=1), \
                       data=data, crit='coeff')")
     if self.opts['verbose']:
         print(r.summary(model))
     out = r.coef(model, matrix=True)
     out = np.array(out)
     index = deepcopy(xvars)
     index.insert(0, 'const')
     beta = pd.Series(out[:, 0], index=index)
     return {'beta': beta, 'sigma': out[0, 1]}

Example #9

0

Show file

File: policycomp.py Project: PhilErickson/LawStructural

def method_spline(rvar, train, test):
    """ B-splines with interaction """
    print("Splines")
    formula = rvar + ' ~ bs(OverallRank, df=6) + treat + '\
              'treat:bs(OverallRank, df=6) - 1'
    if rvar == 'Tuition':
        formula = formula + ' + year'
    model = r.lm(formula, data=train)
    #print(r.summary(model).rx2('coefficients'))
    print(r.summary(model).rx2('r.squared'))
    #print(r.summary(model))
    analytics(rvar, 'Training', train[rvar],
              np.array(r.predict(model)))
    if rvar != "UndergraduatemedianGPA":
        analytics(rvar, 'Testing', test[rvar],
                  np.array(r.predict(model, newdata=test)))
    print()

Example #10

0

Show file

def fit(data, outpath=None, verbosity=0, **kwargs):
    """estimates a multilevel model using the stanarm package in R.

    Todos:

        TODO: before converting data to r_data, filter out columns not appearing
            in formula.
    """
    r_data = pandas2ri.py2ri(data)
    kwargs['data'] = r_data
    kwargs['na.action'] = 'na.omit'
    fit = r.stan_glmer(**kwargs)  # TODO: select appropriate prior.
    if verbosity:
        print(fit)
        # print(fit.rx2('linear.predictors'))
        probs = np.array(fit.rx2('fitted.values'))
        preds = (probs > 0.5).astype(int)
        y = np.array(fit.rx2('y'))
        print('Number of observations: {0}'.format(y.shape[0]))
        print('Distribution of y:\n{0}'.format(np.bincount(y)))
        print('Classification report:')
        print(classification_report(y, preds))
        print('R2 Score:\n', r2_score(y, probs))
        print('Accuracy:\n', accuracy_score(y, preds))
        # r.X11()
        # r.plot(fit)
        # r.posterior_interval(fit, prob=0.95, pars='urban')
        # model evaluation
        # y_draws = r.posterior_predict(fit)
        # preds = r.predict(fit)
        # np.array(r['as.matrix'](fit.rx2('x'))).shape  # design matrix
        # np.array(r['as.matrix'](fit, pars='urban')).shape  # posterior parameter draws
    if verbosity > 1:
        print(r.summary(fit))
    if verbosity > 2:
        inspect(fit)
    if outpath is not None:
        outcome, _, _, _ = get_terms(fit.rx2('formula')[0])
        fname = 'mlm_{0}.rds'.format(outcome)
        save(fit, os.path.join(outpath, fname))
    return fit

Example #11

0

Show file

File: R_tscount.py Project: linneuholanda/InfoDenguePredict


def plot_forecast(data, fcast):
    index = pd.date_range(start=data.index.max(),
                          periods=len(fcast[3]) + 1,
                          freq='W')[1:]
    forecast = pd.Series(fcast[3], index=index)
    lowerpi = pd.Series(fcast[4], index=index)
    upperpi = pd.Series(fcast[5], index=index)
    plt.plot(data.index, data.casos_est, color='b', alpha=0.5)
    plt.plot(forecast.index, forecast.values, color='red')
    plt.fill_between(forecast.index,
                     lowerpi.values,
                     upperpi.values,
                     alpha=0.2,
                     color='red')


if __name__ == "__main__":
    data = get_alerta_table(3304557)  # Nova Iguaçu: 3303609
    tscount = importr('tscount')
    tsglm = r('tsglm')

    model = build_model(data)
    print(r.summary(model))
    r.plot(model)
    # fcast = forecast.forecast(model, h=5, level=95.0)
    # print(fcast[3], fcast[4], fcast[5])
    # plot_forecast(data=data, fcast=fcast)
    # plt.show()

Example #12

0

Show file

File: test_var.py Project: EmlynC/pandas

 def data_summary(self):
     print r.summary(self.rdata)

Example #13

0

Show file

File: test_var.py Project: EmlynC/pandas

 def summary(self, equation=None):
     print r.summary(self._estimate, equation=equation)

Example #14

0

Show file

File: linear_model.py Project: ladjanszki/PLAYGROUND

import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects import r as R
import pandas as pd

# Activating R environment
pandas2ri.activate()
#R = ro.r



# Creating a test DataFrame
data = {'a' : [1, 2, 3, 4, 5, 6, 7, 8, 9],
        'b' : [11, 12, 13, 14, 15, 16, 17, 18, 19],
        'c' : [21, 22, 23, 24, 25, 26, 26, 28, 29]        
}
 
test = pd.DataFrame(data)

print(test.head())

M = R.lm('a ~ b', data=test)


print(R.summary(M).rx2('coefficients'))

Example #15

0

Show file

File: linregress.py Project: daler/linregress

    def __init__(self, formula, **kwargs):
        """
        Class for managing linear regression in R.

        Data are specified with the keyword arguments, which are passed to R's
        global environment.  They are first converted to NumPy arrays.

        For example, the kwarg `x=[1,2,3,4]` will add the list of four
        numbers to R's global env with the variable name `x`.  You can then
        access `x` from the formula.

        `formula` is a string passed verbatim to R's `lm()` function.

        Example usage::

            >>> x = [1, 2, 3, 4]
            >>> y = [1.2, 3, 7, 10]
            >>> m = LinearRegression(x=x, y=y, formula='y~x')
            >>> m.slope
            3.0399999999999996

            >>> m.intercept
            -2.299999999999998

            >>> m.adj_r_squared
            0.97221750212404412

            >>> m.slope_pval(0)
            0.0093041159117684229

            >>> m.intercept_pval(0)
            0.10459053583417365

            >>> # Variables accessible as NumPy arrays
            >>> m.x
            array([1, 2, 3, 4])

        Cross-check with scipy.stats.linregress::

            >>> from scipy.stats import linregress as scipy_linregress
            >>> results = scipy_linregress(x, y)
            >>> eps = 1e-15
            >>> assert abs(results[0] - m.slope) < eps
            >>> eps = 1e-10
            >>> assert abs(results[1] - m.intercept) < eps
            >>> eps = 1e-15
            >>> assert abs(results[2] ** 2 - m.r_squared) < eps
            >>> eps = 1e-15
            >>> assert abs(results[3] - m.slope_pval(0)) < eps


        TODO:
            - support for more complex models (requires examining the coeffs
              matrix to see what's included)

        """

        for k, v in kwargs.items():
            v = np.array(v)
            robjects.globalenv[k] = v
            setattr(self, k, v)

        self.lm = r.lm(formula)
        self.summary = r.summary(self.lm)
        coeffs = self.summary.rx2('coefficients')
        self._intercept_p, self._slope_p = coeffs[6], coeffs[7]

Example #16

0

Show file

 def summary(self, equation=None):
     print r.summary(self._estimate, equation=equation)

Example #17

0

Show file

 def data_summary(self):
     print r.summary(self.rdata)

Example #18

0

Show file

    path = "/install/git/Bioinformatics_paper/胶质母细胞瘤微环境预后相关基因的TCGA数据库挖掘/"
    r.setwd(path)
    # 读取处理好的数据
    sample = pd.read_csv(f"{path}sample.txt", sep="\t", index_col=0)
    sample_Group = sample["Stromal_Group"]
    # 读取处理好的基因表达数据
    HT_HG_U133A_sample = pd.read_csv(f"{path}HT_HG_U133A_sample.txt",
                                     sep="\t").dropna()

    ################# 方差分析(ANOVA) GeneExp_Subtype #################
    # https://www.bioinfo-scrounger.com/archives/588/
    with localconverter(ro.default_converter + pandas2ri.converter):
        ANOVA_data_R = ro.conversion.py2rpy(
            sample[["Stromal_score", "GeneExp_Subtype"]])
        print(
            r.summary(
                r.aov(r("Stromal_score~GeneExp_Subtype"), data=ANOVA_data_R)))

    ################# t检验 IDH1 #################
    r('''suppressMessages(library(MASS))''')
    with localconverter(ro.default_converter + pandas2ri.converter):
        Ttest_data_R = ro.conversion.py2rpy(
            sample[["Stromal_score", "IDH1"]].query("IDH1==1 or IDH1==0"))
        print(r["t.test"](r("Stromal_score~IDH1"), data=Ttest_data_R))

    ################# 生存分析 #################
    # https://www.jianshu.com/p/4ad9ba730719
    # r('''suppressMessages(library(survival))''')
    importr("survival")
    importr("ggfortify")
    with localconverter(ro.default_converter + pandas2ri.converter):
        # 构建生存对象