Esempio n. 1
0
def anova_shape_r_nonoptimal(model, sdata):

    pre_data_frame = sdata.create_r_pre_data_frame(model)
    statsout = StatsOutput(dim=sdata.phenotype_array.shape[1])
    for i in xrange(sdata.phenotype_array.shape[1]):
        pre_data_frame['response'] = robjects.FloatVector(
            sdata.phenotype_array[:, i])
        dataframe = robjects.DataFrame(pre_data_frame)

        robj = robjects.r
        fit_full = robj.lm(robjects.Formula('response' + ' ~ ' +
                                            model.fullmodel),
                           data=dataframe)
        fit_reduced = robj.lm(robjects.Formula('response' + ' ~ ' +
                                               model.nullmodel),
                              data=dataframe)
        model_diff = robjects.r.anova(fit_full, fit_reduced)

        idx_unique = fit_full.rx2('coefficients').names.index(model.unique)
        direction = np.sign(fit_full.rx2('coefficients')[idx_unique])
        idx_pvalues = model_diff.names.index('Pr(>F)')
        statsout.pvalues[i] = model_diff[idx_pvalues][1]
        statsout.pvalues_signed[i] = direction * model_diff[idx_pvalues][1]
        statsout.tvalues[i] = fit_full.rx2('coefficients')[idx_unique]
    return statsout
Esempio n. 2
0
 def fit(self, log10=[]):
     self.log10 = list(map(str.upper, log10))
     if "X" in self.log10:
         x = ro.FloatVector(np.log10(self.x))
     else:
         x = ro.FloatVector(self.x)
     if "Y" in self.log10:
         y = ro.FloatVector(np.log10(self.y))
     else:
         y = ro.FloatVector(self.y)
     weights = ro.FloatVector(self.weights)
     try:
         self.spline = self.mgcv.gam(
             ro.Formula("y ~ s(x, bs='ps')"),
             data=ro.DataFrame({"x": x, "y": y}),
             weights=weights, method="REML")
     except RRuntimeError:
         # NOTE:
         # If the input y data is constant (e.g., the temperature profile
         # has only ONE data point), then above smoothing spline fitting
         # using method "REML" will failed with error:
         #     Error in gam.reparam(UrS, sp, grderiv) :
         #       NA/NaN/Inf in foreign function call (arg 3)
         # or with error:
         #     Error in eigen(hess1, symmetric = TRUE) :
         #       infinite or missing values in 'x'
         print("WARNING: 'mgcv.gam()' using method 'REML' failed!",
               file=sys.stderr)
         self.spline = self.mgcv.gam(
             ro.Formula("y ~ s(x, bs='ps', sp=0.6)"),
             data=ro.DataFrame({"x": x, "y": y}),
             weights=weights)
    def km_plot_data(self, name, time, censor, values):
        values_df = pd.DataFrame(
            {
                'time': time,
                'censor': censor,
                'value': values
            }, dtype=float)
        mean_value = values_df.value.mean()
        values_df['high'] = values_df.value >= mean_value

        data = {
            'time': robjects.FloatVector(values_df['time']),
            'censor': robjects.IntVector(values_df['censor']),
            'high': robjects.IntVector(values_df['high'])
        }
        df = robjects.DataFrame(data)

        # p value
        km_diff = self.surv.survdiff(
            robjects.Formula('Surv(time, censor) ~ high'), data=df)
        chisq_ind = list(km_diff.names).index('chisq')
        pvalue = chi2.sf(km_diff[chisq_ind][0], 1)

        km = self.surv.survfit(robjects.Formula('Surv(time, censor) ~ high'),
                               data=df)
        summary = pandas2ri.ri2py(r.summary(km, extend=True))
        r.assign('km', km)
        r.assign('times', data['time'])
        r.assign('res', r('summary(km, times=times)'))
        cols = r('lapply(c(2:6, 8:11), function(x) res[x])')
        r.assign('cols', cols)
        km_results = r('do.call(data.frame, cols)')
        km_results = pd.DataFrame(km_results)

        low_km = km_results[km_results['strata'] == 'high=0']
        high_km = km_results[km_results['strata'] == 'high=1']

        high_time, high_percent = self.make_plottable_kms(
            high_km['time'], high_km['surv'])
        low_time, low_percent = self.make_plottable_kms(
            low_km['time'], low_km['surv'])

        high = [{
            'percent': i[0],
            'time': i[1]
        } for i in zip(high_percent, high_time)]
        low = [{
            'percent': i[0],
            'time': i[1]
        } for i in zip(low_percent, low_time)]

        return {'high': high, 'low': low, 'p': float('%.4g' % pvalue)}
Esempio n. 4
0
def get_surv_fit(surv,
                 feature=None,
                 covariates=None,
                 interactions=None,
                 formula=None,
                 time_cutoff=5):
    df, factors = process_covariates(surv, feature, covariates)
    if formula is None:
        fmla = get_formula(factors, interactions)
        fmla = robjects.Formula(fmla)
    else:
        fmla = robjects.Formula(formula)

    s = survival.survfit(fmla, df)
    summary = base.summary(s, times=robjects.r.c(time_cutoff))
    res = convert_robj(summary.rx2('table'))

    if type(res) == list:
        r = summary.rx2('table')
        r = pd.Series(r, r.names)
        res = pd.DataFrame({'feature=all': r}).T

    res = res.rename(index=lambda idx: idx.split('=')[1])
    res = res[['records', 'events', 'median', '0.95LCL', '0.95UCL']]
    res.columns = pd.MultiIndex.from_tuples([('Stats', '# Patients'),
                                             ('Stats', '# Events'),
                                             ('Median Survival', 'Median'),
                                             ('Median Survival', 'Lower'),
                                             ('Median Survival', 'Upper')])
    if feature is None:
        for f in ['surv', 'lower', 'upper']:
            res[(str(time_cutoff) + 'y Survival',
                 f.capitalize())] = summary.rx2(f)
    else:
        idx = map(lambda s: s.replace('feature=', ''),
                  summary.rx2('strata').iter_labels())

        df = pd.DataFrame(
            {
                d: list(summary.rx2(d))
                for d in ['strata', 'surv', 'lower', 'upper']
            },
            index=idx)
        for f in ['surv', 'lower', 'upper']:
            res[(str(time_cutoff) + 'y Survival', f.capitalize())] = df[f]

    try:
        res.index = map(int, res.index)
    except:
        pass
    return res
Esempio n. 5
0
    def fit(self, X, y):
        target_column_name = 'target__'
        if type(X) is pd.DataFrame:
            X.columns = sanitize_column_names(list(X))
            X[target_column_name] = y
        else:
            y = y.reshape((-1, 1))
            X = np.concatenate((y, X), axis=1)
            target_column_name = 'X0'
        X_r, X = fix_types(X)
        formula = robj.Formula(
            additive([target_column_name], None,
                     list(set(list(X)) - set([target_column_name]))))

        if self._categorical_target():
            X_r[X_r.colnames.index(target_column_name)] = robj.FactorVector(
                X_r.rx2(target_column_name))

        self.model = STM(self._utils.cv_glmnet,
                         init_prm_translate={
                             'use_model_frame': 'use.model.frame',
                             'nan_action': 'na.action'
                         })(formula,
                            data=X_r,
                            alpha=self.alpha,
                            family=self.family,
                            nan_action=NAN_ACTIONS_TO_R[self.nan_action],
                            intercept=self.fit_intercept,
                            thresh=self.epsilon,
                            maxit=self.max_iter)
        self.model.rclass = robj.StrVector(('cv.glmnet.formula', 'cv.glmnet'))

        return self
Esempio n. 6
0
    def fit_vgam_v1P_helper(dataset, PTO_coords, g, df): 
        
        try:
        
            #define values
            
            predict_x = np.arange(0, np.max(PTO_coords), 1)

            X = robjects.FloatVector(list(PTO_coords.values))
            Y = robjects.FloatVector(list(dataset.ix[PTO_coords.index]))
            DF = robjects.DataFrame({'X':X, 'Y':Y})
                            
            #fit full model

            fmla_full = robjects.Formula("Y ~ s(X, df = %s)" % df)
            fit_full = rvgam.vgam(fmla_full, rvgam.negbinomial, data = DF)
            
            #predict values

            X_pred = robjects.FloatVector(predict_x)
            DF_pred = robjects.DataFrame({'X':X_pred})
            predicted = rvgam.predict(fit_full, newdata = DF_pred, type = 'response')

            #perform LR test

            lr_stats = rvgam.lrtest(fit_full, "s(X, df = %s)" % df)
    
            return list(predicted), lr_stats.do_slot('Body')[3][1], lr_stats.do_slot('Body')[4][1]
    
        except:
            
            return np.nan, np.nan, np.nan
Esempio n. 7
0
def get_cox_ph(surv,
               feature=None,
               covariates=None,
               formula=None,
               interactions=True,
               get_model=True,
               print_desc=False):
    '''
    Fit a cox proportial hazzards model to the data.
    Returns a p-value on the hit_vec coefficient. 
    ---------------------------------------------------
    clinical: DataFrame of clinical variables
    hit_vec: vector of labels to test against
    covariates: names of covariates in the cox model,
                (must be columns in clinical DataFrame)
    '''
    if formula is None:
        s = cox_model_selection(surv, feature, covariates, interactions)
    else:
        df, _ = process_covariates(surv, feature, covariates)
        fmla = robjects.Formula(formula)
        s = survival.coxph(fmla, df)

    if print_desc:
        print '\n\n'.join(str(s).split('\n\n')[-2:])

    if get_model:
        return s
Esempio n. 8
0
    def learnModel(self, X, Y):
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(Y)

        if numpy.unique(Y).shape[0] < 2:
            raise ValueError(
                "Vector of labels must be binary, currently numpy.unique(Y) = "
                + str(numpy.unique(Y)))

        #If Y is 1D make it 2D
        if Y.ndim == 1:
            Y = numpy.array([Y]).T

        XY = self._getDataFrame(X, Y)
        formula = robjects.Formula('class ~ .')
        self.learnModelDataFrame(formula, XY)

        gc.collect()
        robjects.r('gc(verbose=TRUE)')
        robjects.r('memory.profile()')
        gc.collect()

        if self.printMemStats:
            logging.debug(self.getLsos()())
            logging.debug(ProfileUtils.memDisplay(locals()))
Esempio n. 9
0
def feglm(
    fml: str,
    data: pd.DataFrame,
    se: Optional[str] = None,
    **kwargs,
) -> FixestResult:
    """Wrapper for calling fixest::feglm in R."""

    if se is None:
        se = "cluster" if "cluster" in kwargs else "hetero"

    columns = set(re.findall(r"[\w']+", fml))
    columns = [column for column in columns if column != "1"]

    if "cluster" in kwargs:
        columns = columns + list(set(re.findall(r"[\w']+", kwargs["cluster"])))

    result = fixest.feglm(  # pylint: disable=no-member
        robjects.Formula(fml),
        data=data[columns].dropna(subset=columns),
        se=se,
        **kwargs,
    )

    return FixestResult(result, se=se)
Esempio n. 10
0
	def fit(self, X, y, sample_weight=None, **kwargs):
		if self.random_state is not None:
			r('set.seed(' + str(self.random_state) + ')')
		reg_columns = self.reg_columns
		factor_columns = self.factor_columns
		if reg_columns is None:
			reg_columns = []
		else:
			reg_columns = sanitize_column_names(reg_columns)
		if factor_columns is None:
			factor_columns = []
		if sample_weight is None:
			sample_weight = [1]*X.shape[0]
		target_column_name = 'target__'
		if type(X) is pd.DataFrame:
			X.columns = sanitize_column_names(list(X))
			X[target_column_name] = y
		else:
			y = y.reshape((-1, 1))
			X = np.concatenate((y, X), axis=1)
			target_column_name = 'X0'
		self.factor_columns = sanitize_column_names(factor_columns)
		X_r, X = fix_types(X, factor_columns=self.factor_columns)
		formula = robj.Formula(additive([target_column_name], reg_columns, list(set(list(X)) - set([target_column_name]) - set(reg_columns))))

		self.model = r.glmtree(formula, data=X_r, family=self.family, weights=np.array(sample_weight))
		return self
Esempio n. 11
0
def run_lme(signal, mask):
    effects = ro.Formula('signal ~ group + visit +  (1|subject)')

    good_voxels = np.sum(mask > 0.5)
    effects.environment["mask"] = rm = ro.BoolVector(mask > 0.5)
    effects.environment["signal"] = ro.FloatVector(signal).rx(rm)
    # assign variables
    effects.environment["subject"] = subject.rx(rm)
    effects.environment["visit"] = visit.rx(rm)
    effects.environment["group"] = group.rx(rm)

    # allocate space for output
    result = np.zeros(8)
    result[0] = good_voxels

    if good_voxels > 4:
        try:
            # run linear mixed-effect model
            m = base.summary(lme.lmer(effects))
            # extract DF (for the visit)
            result[1] = m.rx2('coefficients').rx(True, 'df')[2]
            # extract coeffecients
            result[2:5] = m.rx2('coefficients').rx(True, 'Estimate')[:]
            # extract t-values
            result[5:8] = m.rx2('coefficients').rx(True, 't value')[:]

        except RRuntimeError:
            # probably model didn't converge
            pass
    else:
        # not enough information
        pass

    return result
Esempio n. 12
0
    def call_DESeq2(self, count_data, samples, conditions):
        """Call DESeq2.
        @count_data is a DataFrame with 'samples' as the column names.
        @samples is a list. @conditions as well. Condition is the one you're contrasting on.
        You can add additional_conditions (a DataFrame, index = samples) which DESeq2 will
        keep under consideration (changes the formula).
        """
        import rpy2.robjects as robjects
        import rpy2.robjects.numpy2ri as numpy2ri
        import mbf_r

        count_data = count_data.values
        count_data = np.array(count_data)
        nr, nc = count_data.shape
        count_data = count_data.reshape(count_data.size)  # turn into 1d vector
        count_data = robjects.r.matrix(
            numpy2ri.py2rpy(count_data), nrow=nr, ncol=nc, byrow=True
        )
        col_data = pd.DataFrame({"sample": samples, "condition": conditions}).set_index(
            "sample"
        )
        formula = "~ condition"
        col_data = col_data.reset_index(drop=True)
        col_data = mbf_r.convert_dataframe_to_r(pd.DataFrame(col_data.to_dict("list")))
        deseq_experiment = robjects.r("DESeqDataSetFromMatrix")(
            countData=count_data, colData=col_data, design=robjects.Formula(formula)
        )
        deseq_experiment = robjects.r("DESeq")(deseq_experiment)
        res = robjects.r("results")(
            deseq_experiment, contrast=robjects.r("c")("condition", "c", "base")
        )
        df = mbf_r.convert_dataframe_from_r(robjects.r("as.data.frame")(res))
        return df
Esempio n. 13
0
def stratified_regression(target, feature, strata):
    target, feature, strata = map(sanitize_for_r, [target, feature, strata])
    fmla = '{} ~ {} + strata({})'.format(target.name, feature.name,
                                         strata.name)
    fmla = robjects.Formula(fmla)
    df_r = process_factors([strata, target, feature])
    fit = lm(fmla, df_r)

    fmla = '{} ~ strata({})'.format(target.name, strata.name)
    fmla = robjects.Formula(fmla)
    fit_null = lm(fmla, df_r)

    f_stat = robjects.r.anova(fit_null, fit)[4][1]
    p = robjects.r.anova(fit_null, fit)[5][1]

    return pd.Series({'F': f_stat, 'p': p})
Esempio n. 14
0
def create_atlas(expressionDatafile, sampleDatafile):
    """Create atlas expression matrix from input files. expressionDatafile is the full path to the csv file 
    (readable into a DataFrame by pandas.read_csv function) that contains all the datasets concatenated columnwise.
    sampleDatafile is the full path to the csv file that contain all sample information. This table must contain
    a column named 'Platform_Category', which is the variable to used to filter out genes with high variance.
    Note that in the expression matrix, RNASeq data should have zeros as zeros, not nans.
    """
    # Using rpy2 package to plug in R. Also need variancePartition R package installed in this environment.
    import rpy2
    import rpy2.robjects as robjects
    from rpy2.robjects.packages import importr
    import rpy2.robjects.numpy2ri, rpy2.robjects.pandas2ri
    rpy2.robjects.numpy2ri.activate()
    rpy2.robjects.pandas2ri.activate()
    variancePartition = importr('variancePartition')

    # Read in expression data and sample metadata.
    data        = pandas.read_csv(expressionDatafile)
    metadata    = pandas.read_csv(sampleDatafile)
    data.dropna(how='any', inplace=True) # Drop genes that are not measurable in every dataset due to probes being absent 

    # Search for platform dependent genes 
    form              = robjects.Formula('~ Platform_Category')
    varPart           = variancePartition.fitExtractVarPartModel(transform_to_percentile(data), form, metadata[['Platform_Category']])

    sel_varPart       = numpy.array(varPart)[0] <= 0.2 #This is the filtering step
    genes_to_keep     = data.index.values[sel_varPart] #genes_to_keep is an array holding all the genes that pass the filter

    filtered_data     = aransform(data.loc[genes_to_keep].copy())
    return filtered_data
def do_km(name, time, censor, split, outdir):
    """Given three clean (pre-processed) lists, make a kmplot of the data, and save it to outdir"""
    data = {
        'time': robjects.IntVector(np.array(time)),
        'censor': robjects.IntVector(np.array(censor)),
        'split': robjects.IntVector(np.array(split))
    }
    df = robjects.DataFrame(data)

    surv = importr('survival')
    grdevices = importr('grDevices')
    km = surv.survfit(robjects.Formula('Surv(time, censor) ~ split'), data=df)
    grdevices.png(file=os.path.join(outdir, name + '_km.png'),
                  width=512,
                  height=512)

    r.plot(km,
           xlab='Time',
           ylab='Cumulative Hazard',
           col=robjects.StrVector(['Red', 'Blue']))
    r.legend(1000,
             1,
             robjects.StrVector(['<= Mean', '> Mean']),
             lty=robjects.IntVector([1, 1]),
             col=robjects.StrVector(['Red', 'Blue']))
    grdevices.dev_off()
Esempio n. 16
0
def build_tree(label_struct, label_column, features, method='anova'):
    # make formula
    xs = features  # ["x%s" % i for i in range(len(features))]
    fla = " y ~ %s " % " + ".join(xs)
    fmla = ro.Formula(fla)
    env = fmla.environment

    for fi in range(len(features)):
        fvec = r.c()
        fname = features[fi]
        for n in label_struct["notes"]:
            fvec = r.c(fvec, int(n[fname]))
        env[fname] = fvec
        print fname, "----", fvec

    fyvec = r.c()
    for n in label_struct["notes"]:
        fyvec = r.c(fyvec, n[label_column])

    if method == 'anova':
        env['y'] = fyvec
    else:
        env['y'] = r.factor(fyvec)
        print r.levels(env['y'])

    r('library(rpart)')
    return r('rpart')(fmla, method=method)
 def SVM_fitting_R(self,formula,dataframe):
     e1071 = importr('e1071')
     r_svm = robjects.r["svm"]
     #r_false = robjects.r["FALSE"]
     formula_R = robjects.Formula(formula) 
     model = r_svm(formula=formula_R, data=dataframe, kernel = "linear", cost = 10, scale = 0)
     return model
Esempio n. 18
0
def test_regress(x):

    stats = importr('stats')

    x = random.uniform(0, 1, 100).reshape([100, 1])
    y = 1 + x + random.uniform(0, 1, 100).reshape([100, 1])

    x_in_r = create_r_matrix(x, x.shape[1])
    y_in_r = create_r_matrix(y, y.shape[1])

    formula = robjects.Formula('y~x')

    env = formula.environment
    env['x'] = x_in_r
    env['y'] = y_in_r

    fit = stats.lm(formula)

    coeffs = stats.coef(fit)
    resids = stats.residuals(fit)
    fitted_vals = stats.fitted(fit)
    modsum = base.summary(fit)
    rsquared = modsum.rx2('r.squared')
    se = modsum.rx2('coefficients')[2:4]

    print "coeffs:", coeffs
    print "resids:", resids
    print "fitted_vals:", fitted_vals
    print "rsquared:", rsquared
    print "se:", se

    return (coeffs, resids, fitted_vals, rsquared, se)
Esempio n. 19
0
def render_plot(gp, args):
  """Render a plot using ggplot

  :gp: A base ggplot2 object
  :x: The x value expression
  :y: The y value expression
  :type: The type of plot to make

  """
  args = util.Namespace(args)

  import rpy2.robjects.lib.ggplot2 as ggplot2

  pp = gp + ggplot2.aes_string(x=args.x,
                               y=args.y)

  if args.type == 'points':
    pp += ggplot2.geom_point()
  elif args.type == 'lines':
    pp += ggplot2.geom_line()
  elif args.type == 'boxplot':
    pp += ggplot2.geom_boxplot()
  else:
    raise Exception("{0} not implemented".format(args.type))

  if args.facets is not None:
    try:
      pp += ggplot2.facet_grid(ro.Formula(args.facets))
    except Exception:
      pass

  try:
    pp.plot()
  except Exception:
    pass
Esempio n. 20
0
def run_nlme(jacobian):
    # this object have to be defined within the function to avoid funny results due to concurrent execution
    fixed_effects = ro.Formula(
        'Jacobian ~ I(Age^2) + Gender:I(Age^2) + Age + Gender:Age + Gender')

    # assign variables
    fixed_effects.environment["Subject"] = Subject
    fixed_effects.environment["Visit"] = Visit
    fixed_effects.environment["Age"] = Age
    fixed_effects.environment["Gender"] = Gender

    # update jacobian variable
    fixed_effects.environment["Jacobian"] = ro.FloatVector(jacobian)

    # allocate space for output
    result = np.zeros(shape=[12], dtype=np.float64, order='C')

    try:
        # run linear mixed-effect model
        l = base.summary(
            nlme.lme(fixed_effects, random=random_effects, method="ML"))

        # extract coeffecients
        result[0:6] = l.rx2('coefficients').rx2('fixed')[:]
        # extract t-values
        result[6:12] = l.rx2('tTable').rx(True, 4)[:]
    except RRuntimeError:
        # probably model didn't converge
        pass

    return result
Esempio n. 21
0
def _edger_func_fit_glm(the_data, the_method, the_formula, common_disp=False, **vars):
    if the_method not in {'GLM', 'QLGLM'}:
        raise NotImplementedError("Only GLM and QLGLM methods are supported at present")
    fit = None
    rdata = pandas2ri.py2ri(the_data)

    formula = robjects.Formula(the_formula)
    for k, v in vars.items():
        formula.environment[k] = robjects.FactorVector(v)

    y = r("DGEList")(rdata)
    y = r("calcNormFactors")(y)
    design = r("model.matrix")(formula)

    if common_disp:
        # use a common estimate of the dispersion rather than using experimental structure
        # this is helpful where we have no replicates
        y = r("estimateGLMCommonDisp")(y, method='deviance', robust=True, subset=robjects.NULL)
    else:
        y = r("estimateDisp")(y, design)
    if the_method == 'GLM':
        fit = r('glmFit')(y, design)
    elif the_method == 'QLGLM':
        fit = r('glmQLFit')(y, design)
    return fit, design
Esempio n. 22
0
def test_setenvironment():
    fml = robjects.Formula('y ~ x')
    newenv = robjects.baseenv['new.env']()
    env = fml.getenvironment()
    assert not newenv.rsame(env)
    fml.setenvironment(newenv)
    env = fml.getenvironment()
    assert newenv.rsame(env)
Esempio n. 23
0
 def testReprNonVectorInList(self):
     vec = robjects.ListVector(
         OrderedDict((
             ('a', 1),
             ('b', robjects.Formula('y ~ x')),
         )))
     s = repr(vec).split('\n')
     self.assertEqual('[IntVector, Formula]', s[2].strip())
Esempio n. 24
0
    def _fit_survival(self, X, event, time):
        data = pandas.concat((X, time, event), axis=1)

        formula = robjects.Formula("Surv({0}, {1}) ~ .".format(time.name, event.name))
        rdata = _convert_to_r(data)
        params = self._get_r_params()
        self._set_fit_features(lambda v: v != time.name and v != event.name, X)
        self._fit(formula, rdata, params)
Esempio n. 25
0
 def testSetenvironment(self):
     fml = robjects.Formula("y ~ x")
     newenv = robjects.baseenv['new.env']()
     env = fml.getenvironment()
     self.assertFalse(newenv.rsame(env))
     fml.setenvironment(newenv)
     env = fml.getenvironment()
     self.assertTrue(newenv.rsame(env))
Esempio n. 26
0
    def boxplotFormulae(self, outPath, x, y, dataframe, **kwargs):
        """
        Makes a boxplot out of an x and y formulae and a dataframe.
        Uses: http://stat.ethz.ch/R-manual/R-devel/library/graphics/html/boxplot.html

        @type outPath: string
        @param outPath: Path for the output file
        @type x: rpy2.robjects.vectors.FloatVector or rpy2.robjects.vectors.IntVector
        @param x: First argument given to R.Formula (see  http://rpy.sourceforge.net/rpy2/doc-2.2/html/robjects_formulae.html for details)
        @type y: rpy2.robjects.vectors.FloatVector or rpy2.robjects.vectors.IntVector
        @param y: Second argument given to R.Formula (see  http://rpy.sourceforge.net/rpy2/doc-2.2/html/robjects_formulae.html for details)
        @type dataframe: rpy2.robjects.DataFrame
        @param dataframe: An R dataframe with in the columns the values for each boxplot and as column name the name for the x-axis
        @param kwargs: Additional arguments. See defaultdict in getParams documentation for a full list of possible arguments.
        @raise TypeError: plotArgs not a dictionary
        @raise TypeError: dataframe is not of type dataframe
        @raise TypeError: x is not of type rpy2.robjects.vectors.FloatVector or of type rpy2.robjects.vectors.IntVector
        @raise TypeError: y is not of type rpy2.robjects.vectors.FloatVector or of type rpy2.robjects.vectors.IntVector      
        
        B{Example}:

        Plotting a boxplot. The boxplot from dataframe is easier to use. 
       
        >>> import rpy2.robjects as R
        >>> x = R.IntVector([1,1,4,0,3])
        >>> y = R.IntVector([0,0,4,3,2])
        >>> dataframe = R.DataFrame({'a':R.IntVector([1,1,4,0,3]),'b':R.IntVector([0,0,4,3,2]), 'c':R.IntVector([2,3,4,2,1])'})
        >>> plots = rPlots.Plots()
        >>> plots.boxplotDataframe('example_output.png', x, y, dataframe, width=400, height=400, 
        ...                        title='feature and ms/ms per intensity', xlab = 'log 10 of intensity', ylab = '# of MS/MS per feature' )
        >>> R.r['dev.off']() 
        """
        if not isinstance(dataframe, R.DataFrame):
            raise TypeError, 'dataframe is not of type rpy2.robjects.DataFrame. Instead, is of type: '+str(type(dataframe))
        
        if not isinstance(x, R.IntVector) and not isinstance(x, R.FloatVector):
            raise TypeError, 'x given to boxplotFormulae is not of type rpy2.robjects.vectors.FloatVector or of type rpy2.robjects.vectors.IntVector. Instead, is of type: '+str(type(x))
        if not isinstance(y, R.IntVector) and not isinstance(y, R.FloatVector):
            raise TypeError, 'y given to boxplotFormulae is not of type rpy2.robjects.vectors.FloatVector or of type rpy2.robjects.vectors.IntVector. Instead, is of type: '+str(type(y))
   
        # getting the parameter values (the ones given to the function, or default if the param wasn't given to the function)
        params = self.getParams(dict(**kwargs))
        
        formula = R.Formula('x ~ y')
        env = formula.environment
        env['x'] = x
        env['y'] = y
        
        boxplot = R.r['boxplot']
        R.r.png(outPath, width=params['width'], height=params['height'])    
        boxplot(formula, data = dataframe)
#         because plotArgs cannot be given to hist when it is None (gives an error), check if it has a value
        if params['plotArgs'] == None:
            R.r['boxplot'](formula, data = dataframe, main = params['title'], ylab = params['ylab'], xlab = params['xlab'])
        else:
            if not type(params['plotArgs']) == dict:
                raise TypeError, 'plotArgs given to rFeaturePerIntensityHistogram has to be of type dict. Instead, is of type: '+str(type(params['plotArgs']))
            R.r['boxplot'](formula, data = dataframe, main = params['title'], ylab = params['ylab'], xlab = params['xlab'], **params['plotArgs'])
Esempio n. 27
0
def regressSpec(w, wL, X): #,sigma2=1,intercept=True):

    # compute s
    s = -1j*w

    # TODO, if regression fails, it might be because there is no exponential
    # term, maybe do a second regression then on a linear model. 
    a   = 0                  # Linear 
    rT2 = 0.1                # T2 regressed
    r   = robjects.r         

    # Variable shared between R and Python
    robjects.globalenv['a'] = a
    robjects.globalenv['rT2'] = rT2
    robjects.globalenv['wL'] = wL
    robjects.globalenv['nb'] = 0

    s = robjects.ComplexVector(numpy.array(s))
    XX = robjects.ComplexVector(X)
    Xr = robjects.FloatVector(numpy.real(X))
    Xi = robjects.FloatVector(numpy.imag(X))
    Xa = robjects.FloatVector(numpy.abs(X))
    Xri = robjects.FloatVector(numpy.concatenate((Xr,Xi)))
    
    #my_lower = robjects.r('list(a=.001, rT2=.001, nb=.0001)')
    my_lower = robjects.r('list(a=.001, rT2=.001)')
    #my_upper = robjects.r('list(a=1.5, rT2=.300, nb =100.)')
    my_upper = robjects.r('list(a=1.5, rT2=.300)')
     
    #my_list = robjects.r('list(a=.2, rT2=0.03, nb=.1)')
    my_list = robjects.r('list(a=.2, rT2=0.03)')
    my_cont = robjects.r('nls.control(maxiter=5000, warnOnly=TRUE, printEval=FALSE)')
    
    #fmla = robjects.Formula('Xri ~ c(a*Re((wL) / (wL^2+(s+1/rT2)^2 )), a*Im((wL)/(wL^2 + (s+1/rT2)^2 )))') # envelope
    ##fmla = robjects.Formula('Xri ~ c(a*Re((wL) / (wL^2+(s+1/rT2)^2 )), a*Im((wL)/(wL^2 + (s+1/rT2)^2 )))') # envelope
    #fmla = robjects.Formula('XX ~ a*(wL) / (wL^2 + (s+1/rT2)^2 )') # complex
    #fmla = robjects.Formula('Xa ~ abs(a*(wL) / (wL^2 + (s+1/rT2)^2 )) + nb') # complex
    fmla = robjects.Formula('Xa ~ abs(a*(wL) / (wL^2 + (s+1/rT2)^2 ))') # complex
 
    env = fmla.getenvironment()
    env['s'] = s
    env['Xr'] = Xr
    env['Xa'] = Xa
    env['Xi'] = Xi
    env['Xri'] = Xri
    env['XX'] = XX
     
    #fit = robjects.r.tryCatch(robjects.r.nls(fmla,start=my_list, control=my_cont)) #, lower=my_lower, algorithm='port')) #, \
    fit = robjects.r.tryCatch(robjects.r.nls(fmla, start=my_list, control=my_cont, lower=my_lower, upper=my_upper, algorithm='port')) #, \
    report =  r.summary(fit)
    #print report 
    #print  r.warnings()
 
    a  =  r['$'](report,'par')[0]
    rT2 =  r['$'](report,'par')[1]
    nb =  r['$'](report,'par')[2]
    
    return a, rT2, nb
Esempio n. 28
0
def quaReg(x, y):
    import rpy2.robjects as robjects
    rObj = robjects.r
    qreg = robjects.packages.importr(name='quantreg')

    rObj.assign('xr', robjects.FloatVector(x))
    rObj.assign('yr', robjects.FloatVector(y))
    rObj("dt <- data.frame(xr,yr)")
    fmla = robjects.Formula('')
Esempio n. 29
0
def test_repr_nonvectorinlist():
    vec = robjects.ListVector(
        OrderedDict((
            ('a', 1),
            ('b', robjects.Formula('y ~ x')),
        )))
    s = repr(vec)
    assert s.startswith("R object with classes: ('list',) mapped to:%s"
                        "[IntSexpVector, LangSexpVector]" % os.linesep)
Esempio n. 30
0
def get_breakpoints_and_levels(id_user):
    model = pickle.loads(open('../data/models/{}'.format(id_user), 'rb').read())
    freq = model['freq']
    formula = robjects.Formula('freq_tweet ~ 1')
    env = formula.environment
    env['freq_tweet'] = robjects.r['ts'](robjects.FloatVector(freq.values),  start=freq.min())
    breakpoints = robjects.r['breakpoints'](formula)
    fitted = robjects.r['fitted'](breakpoints, breaks=len(breakpoints[0]))
    return {freq.index[int(i)]:fitted[int(i)] for i in [0.0]+list(breakpoints[0])}