def anova_shape_r_nonoptimal(model, sdata): pre_data_frame = sdata.create_r_pre_data_frame(model) statsout = StatsOutput(dim=sdata.phenotype_array.shape[1]) for i in xrange(sdata.phenotype_array.shape[1]): pre_data_frame['response'] = robjects.FloatVector( sdata.phenotype_array[:, i]) dataframe = robjects.DataFrame(pre_data_frame) robj = robjects.r fit_full = robj.lm(robjects.Formula('response' + ' ~ ' + model.fullmodel), data=dataframe) fit_reduced = robj.lm(robjects.Formula('response' + ' ~ ' + model.nullmodel), data=dataframe) model_diff = robjects.r.anova(fit_full, fit_reduced) idx_unique = fit_full.rx2('coefficients').names.index(model.unique) direction = np.sign(fit_full.rx2('coefficients')[idx_unique]) idx_pvalues = model_diff.names.index('Pr(>F)') statsout.pvalues[i] = model_diff[idx_pvalues][1] statsout.pvalues_signed[i] = direction * model_diff[idx_pvalues][1] statsout.tvalues[i] = fit_full.rx2('coefficients')[idx_unique] return statsout
def fit(self, log10=[]): self.log10 = list(map(str.upper, log10)) if "X" in self.log10: x = ro.FloatVector(np.log10(self.x)) else: x = ro.FloatVector(self.x) if "Y" in self.log10: y = ro.FloatVector(np.log10(self.y)) else: y = ro.FloatVector(self.y) weights = ro.FloatVector(self.weights) try: self.spline = self.mgcv.gam( ro.Formula("y ~ s(x, bs='ps')"), data=ro.DataFrame({"x": x, "y": y}), weights=weights, method="REML") except RRuntimeError: # NOTE: # If the input y data is constant (e.g., the temperature profile # has only ONE data point), then above smoothing spline fitting # using method "REML" will failed with error: # Error in gam.reparam(UrS, sp, grderiv) : # NA/NaN/Inf in foreign function call (arg 3) # or with error: # Error in eigen(hess1, symmetric = TRUE) : # infinite or missing values in 'x' print("WARNING: 'mgcv.gam()' using method 'REML' failed!", file=sys.stderr) self.spline = self.mgcv.gam( ro.Formula("y ~ s(x, bs='ps', sp=0.6)"), data=ro.DataFrame({"x": x, "y": y}), weights=weights)
def km_plot_data(self, name, time, censor, values): values_df = pd.DataFrame( { 'time': time, 'censor': censor, 'value': values }, dtype=float) mean_value = values_df.value.mean() values_df['high'] = values_df.value >= mean_value data = { 'time': robjects.FloatVector(values_df['time']), 'censor': robjects.IntVector(values_df['censor']), 'high': robjects.IntVector(values_df['high']) } df = robjects.DataFrame(data) # p value km_diff = self.surv.survdiff( robjects.Formula('Surv(time, censor) ~ high'), data=df) chisq_ind = list(km_diff.names).index('chisq') pvalue = chi2.sf(km_diff[chisq_ind][0], 1) km = self.surv.survfit(robjects.Formula('Surv(time, censor) ~ high'), data=df) summary = pandas2ri.ri2py(r.summary(km, extend=True)) r.assign('km', km) r.assign('times', data['time']) r.assign('res', r('summary(km, times=times)')) cols = r('lapply(c(2:6, 8:11), function(x) res[x])') r.assign('cols', cols) km_results = r('do.call(data.frame, cols)') km_results = pd.DataFrame(km_results) low_km = km_results[km_results['strata'] == 'high=0'] high_km = km_results[km_results['strata'] == 'high=1'] high_time, high_percent = self.make_plottable_kms( high_km['time'], high_km['surv']) low_time, low_percent = self.make_plottable_kms( low_km['time'], low_km['surv']) high = [{ 'percent': i[0], 'time': i[1] } for i in zip(high_percent, high_time)] low = [{ 'percent': i[0], 'time': i[1] } for i in zip(low_percent, low_time)] return {'high': high, 'low': low, 'p': float('%.4g' % pvalue)}
def get_surv_fit(surv, feature=None, covariates=None, interactions=None, formula=None, time_cutoff=5): df, factors = process_covariates(surv, feature, covariates) if formula is None: fmla = get_formula(factors, interactions) fmla = robjects.Formula(fmla) else: fmla = robjects.Formula(formula) s = survival.survfit(fmla, df) summary = base.summary(s, times=robjects.r.c(time_cutoff)) res = convert_robj(summary.rx2('table')) if type(res) == list: r = summary.rx2('table') r = pd.Series(r, r.names) res = pd.DataFrame({'feature=all': r}).T res = res.rename(index=lambda idx: idx.split('=')[1]) res = res[['records', 'events', 'median', '0.95LCL', '0.95UCL']] res.columns = pd.MultiIndex.from_tuples([('Stats', '# Patients'), ('Stats', '# Events'), ('Median Survival', 'Median'), ('Median Survival', 'Lower'), ('Median Survival', 'Upper')]) if feature is None: for f in ['surv', 'lower', 'upper']: res[(str(time_cutoff) + 'y Survival', f.capitalize())] = summary.rx2(f) else: idx = map(lambda s: s.replace('feature=', ''), summary.rx2('strata').iter_labels()) df = pd.DataFrame( { d: list(summary.rx2(d)) for d in ['strata', 'surv', 'lower', 'upper'] }, index=idx) for f in ['surv', 'lower', 'upper']: res[(str(time_cutoff) + 'y Survival', f.capitalize())] = df[f] try: res.index = map(int, res.index) except: pass return res
def fit(self, X, y): target_column_name = 'target__' if type(X) is pd.DataFrame: X.columns = sanitize_column_names(list(X)) X[target_column_name] = y else: y = y.reshape((-1, 1)) X = np.concatenate((y, X), axis=1) target_column_name = 'X0' X_r, X = fix_types(X) formula = robj.Formula( additive([target_column_name], None, list(set(list(X)) - set([target_column_name])))) if self._categorical_target(): X_r[X_r.colnames.index(target_column_name)] = robj.FactorVector( X_r.rx2(target_column_name)) self.model = STM(self._utils.cv_glmnet, init_prm_translate={ 'use_model_frame': 'use.model.frame', 'nan_action': 'na.action' })(formula, data=X_r, alpha=self.alpha, family=self.family, nan_action=NAN_ACTIONS_TO_R[self.nan_action], intercept=self.fit_intercept, thresh=self.epsilon, maxit=self.max_iter) self.model.rclass = robj.StrVector(('cv.glmnet.formula', 'cv.glmnet')) return self
def fit_vgam_v1P_helper(dataset, PTO_coords, g, df): try: #define values predict_x = np.arange(0, np.max(PTO_coords), 1) X = robjects.FloatVector(list(PTO_coords.values)) Y = robjects.FloatVector(list(dataset.ix[PTO_coords.index])) DF = robjects.DataFrame({'X':X, 'Y':Y}) #fit full model fmla_full = robjects.Formula("Y ~ s(X, df = %s)" % df) fit_full = rvgam.vgam(fmla_full, rvgam.negbinomial, data = DF) #predict values X_pred = robjects.FloatVector(predict_x) DF_pred = robjects.DataFrame({'X':X_pred}) predicted = rvgam.predict(fit_full, newdata = DF_pred, type = 'response') #perform LR test lr_stats = rvgam.lrtest(fit_full, "s(X, df = %s)" % df) return list(predicted), lr_stats.do_slot('Body')[3][1], lr_stats.do_slot('Body')[4][1] except: return np.nan, np.nan, np.nan
def get_cox_ph(surv, feature=None, covariates=None, formula=None, interactions=True, get_model=True, print_desc=False): ''' Fit a cox proportial hazzards model to the data. Returns a p-value on the hit_vec coefficient. --------------------------------------------------- clinical: DataFrame of clinical variables hit_vec: vector of labels to test against covariates: names of covariates in the cox model, (must be columns in clinical DataFrame) ''' if formula is None: s = cox_model_selection(surv, feature, covariates, interactions) else: df, _ = process_covariates(surv, feature, covariates) fmla = robjects.Formula(formula) s = survival.coxph(fmla, df) if print_desc: print '\n\n'.join(str(s).split('\n\n')[-2:]) if get_model: return s
def learnModel(self, X, Y): Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(Y) if numpy.unique(Y).shape[0] < 2: raise ValueError( "Vector of labels must be binary, currently numpy.unique(Y) = " + str(numpy.unique(Y))) #If Y is 1D make it 2D if Y.ndim == 1: Y = numpy.array([Y]).T XY = self._getDataFrame(X, Y) formula = robjects.Formula('class ~ .') self.learnModelDataFrame(formula, XY) gc.collect() robjects.r('gc(verbose=TRUE)') robjects.r('memory.profile()') gc.collect() if self.printMemStats: logging.debug(self.getLsos()()) logging.debug(ProfileUtils.memDisplay(locals()))
def feglm( fml: str, data: pd.DataFrame, se: Optional[str] = None, **kwargs, ) -> FixestResult: """Wrapper for calling fixest::feglm in R.""" if se is None: se = "cluster" if "cluster" in kwargs else "hetero" columns = set(re.findall(r"[\w']+", fml)) columns = [column for column in columns if column != "1"] if "cluster" in kwargs: columns = columns + list(set(re.findall(r"[\w']+", kwargs["cluster"]))) result = fixest.feglm( # pylint: disable=no-member robjects.Formula(fml), data=data[columns].dropna(subset=columns), se=se, **kwargs, ) return FixestResult(result, se=se)
def fit(self, X, y, sample_weight=None, **kwargs): if self.random_state is not None: r('set.seed(' + str(self.random_state) + ')') reg_columns = self.reg_columns factor_columns = self.factor_columns if reg_columns is None: reg_columns = [] else: reg_columns = sanitize_column_names(reg_columns) if factor_columns is None: factor_columns = [] if sample_weight is None: sample_weight = [1]*X.shape[0] target_column_name = 'target__' if type(X) is pd.DataFrame: X.columns = sanitize_column_names(list(X)) X[target_column_name] = y else: y = y.reshape((-1, 1)) X = np.concatenate((y, X), axis=1) target_column_name = 'X0' self.factor_columns = sanitize_column_names(factor_columns) X_r, X = fix_types(X, factor_columns=self.factor_columns) formula = robj.Formula(additive([target_column_name], reg_columns, list(set(list(X)) - set([target_column_name]) - set(reg_columns)))) self.model = r.glmtree(formula, data=X_r, family=self.family, weights=np.array(sample_weight)) return self
def run_lme(signal, mask): effects = ro.Formula('signal ~ group + visit + (1|subject)') good_voxels = np.sum(mask > 0.5) effects.environment["mask"] = rm = ro.BoolVector(mask > 0.5) effects.environment["signal"] = ro.FloatVector(signal).rx(rm) # assign variables effects.environment["subject"] = subject.rx(rm) effects.environment["visit"] = visit.rx(rm) effects.environment["group"] = group.rx(rm) # allocate space for output result = np.zeros(8) result[0] = good_voxels if good_voxels > 4: try: # run linear mixed-effect model m = base.summary(lme.lmer(effects)) # extract DF (for the visit) result[1] = m.rx2('coefficients').rx(True, 'df')[2] # extract coeffecients result[2:5] = m.rx2('coefficients').rx(True, 'Estimate')[:] # extract t-values result[5:8] = m.rx2('coefficients').rx(True, 't value')[:] except RRuntimeError: # probably model didn't converge pass else: # not enough information pass return result
def call_DESeq2(self, count_data, samples, conditions): """Call DESeq2. @count_data is a DataFrame with 'samples' as the column names. @samples is a list. @conditions as well. Condition is the one you're contrasting on. You can add additional_conditions (a DataFrame, index = samples) which DESeq2 will keep under consideration (changes the formula). """ import rpy2.robjects as robjects import rpy2.robjects.numpy2ri as numpy2ri import mbf_r count_data = count_data.values count_data = np.array(count_data) nr, nc = count_data.shape count_data = count_data.reshape(count_data.size) # turn into 1d vector count_data = robjects.r.matrix( numpy2ri.py2rpy(count_data), nrow=nr, ncol=nc, byrow=True ) col_data = pd.DataFrame({"sample": samples, "condition": conditions}).set_index( "sample" ) formula = "~ condition" col_data = col_data.reset_index(drop=True) col_data = mbf_r.convert_dataframe_to_r(pd.DataFrame(col_data.to_dict("list"))) deseq_experiment = robjects.r("DESeqDataSetFromMatrix")( countData=count_data, colData=col_data, design=robjects.Formula(formula) ) deseq_experiment = robjects.r("DESeq")(deseq_experiment) res = robjects.r("results")( deseq_experiment, contrast=robjects.r("c")("condition", "c", "base") ) df = mbf_r.convert_dataframe_from_r(robjects.r("as.data.frame")(res)) return df
def stratified_regression(target, feature, strata): target, feature, strata = map(sanitize_for_r, [target, feature, strata]) fmla = '{} ~ {} + strata({})'.format(target.name, feature.name, strata.name) fmla = robjects.Formula(fmla) df_r = process_factors([strata, target, feature]) fit = lm(fmla, df_r) fmla = '{} ~ strata({})'.format(target.name, strata.name) fmla = robjects.Formula(fmla) fit_null = lm(fmla, df_r) f_stat = robjects.r.anova(fit_null, fit)[4][1] p = robjects.r.anova(fit_null, fit)[5][1] return pd.Series({'F': f_stat, 'p': p})
def create_atlas(expressionDatafile, sampleDatafile): """Create atlas expression matrix from input files. expressionDatafile is the full path to the csv file (readable into a DataFrame by pandas.read_csv function) that contains all the datasets concatenated columnwise. sampleDatafile is the full path to the csv file that contain all sample information. This table must contain a column named 'Platform_Category', which is the variable to used to filter out genes with high variance. Note that in the expression matrix, RNASeq data should have zeros as zeros, not nans. """ # Using rpy2 package to plug in R. Also need variancePartition R package installed in this environment. import rpy2 import rpy2.robjects as robjects from rpy2.robjects.packages import importr import rpy2.robjects.numpy2ri, rpy2.robjects.pandas2ri rpy2.robjects.numpy2ri.activate() rpy2.robjects.pandas2ri.activate() variancePartition = importr('variancePartition') # Read in expression data and sample metadata. data = pandas.read_csv(expressionDatafile) metadata = pandas.read_csv(sampleDatafile) data.dropna(how='any', inplace=True) # Drop genes that are not measurable in every dataset due to probes being absent # Search for platform dependent genes form = robjects.Formula('~ Platform_Category') varPart = variancePartition.fitExtractVarPartModel(transform_to_percentile(data), form, metadata[['Platform_Category']]) sel_varPart = numpy.array(varPart)[0] <= 0.2 #This is the filtering step genes_to_keep = data.index.values[sel_varPart] #genes_to_keep is an array holding all the genes that pass the filter filtered_data = aransform(data.loc[genes_to_keep].copy()) return filtered_data
def do_km(name, time, censor, split, outdir): """Given three clean (pre-processed) lists, make a kmplot of the data, and save it to outdir""" data = { 'time': robjects.IntVector(np.array(time)), 'censor': robjects.IntVector(np.array(censor)), 'split': robjects.IntVector(np.array(split)) } df = robjects.DataFrame(data) surv = importr('survival') grdevices = importr('grDevices') km = surv.survfit(robjects.Formula('Surv(time, censor) ~ split'), data=df) grdevices.png(file=os.path.join(outdir, name + '_km.png'), width=512, height=512) r.plot(km, xlab='Time', ylab='Cumulative Hazard', col=robjects.StrVector(['Red', 'Blue'])) r.legend(1000, 1, robjects.StrVector(['<= Mean', '> Mean']), lty=robjects.IntVector([1, 1]), col=robjects.StrVector(['Red', 'Blue'])) grdevices.dev_off()
def build_tree(label_struct, label_column, features, method='anova'): # make formula xs = features # ["x%s" % i for i in range(len(features))] fla = " y ~ %s " % " + ".join(xs) fmla = ro.Formula(fla) env = fmla.environment for fi in range(len(features)): fvec = r.c() fname = features[fi] for n in label_struct["notes"]: fvec = r.c(fvec, int(n[fname])) env[fname] = fvec print fname, "----", fvec fyvec = r.c() for n in label_struct["notes"]: fyvec = r.c(fyvec, n[label_column]) if method == 'anova': env['y'] = fyvec else: env['y'] = r.factor(fyvec) print r.levels(env['y']) r('library(rpart)') return r('rpart')(fmla, method=method)
def SVM_fitting_R(self,formula,dataframe): e1071 = importr('e1071') r_svm = robjects.r["svm"] #r_false = robjects.r["FALSE"] formula_R = robjects.Formula(formula) model = r_svm(formula=formula_R, data=dataframe, kernel = "linear", cost = 10, scale = 0) return model
def test_regress(x): stats = importr('stats') x = random.uniform(0, 1, 100).reshape([100, 1]) y = 1 + x + random.uniform(0, 1, 100).reshape([100, 1]) x_in_r = create_r_matrix(x, x.shape[1]) y_in_r = create_r_matrix(y, y.shape[1]) formula = robjects.Formula('y~x') env = formula.environment env['x'] = x_in_r env['y'] = y_in_r fit = stats.lm(formula) coeffs = stats.coef(fit) resids = stats.residuals(fit) fitted_vals = stats.fitted(fit) modsum = base.summary(fit) rsquared = modsum.rx2('r.squared') se = modsum.rx2('coefficients')[2:4] print "coeffs:", coeffs print "resids:", resids print "fitted_vals:", fitted_vals print "rsquared:", rsquared print "se:", se return (coeffs, resids, fitted_vals, rsquared, se)
def render_plot(gp, args): """Render a plot using ggplot :gp: A base ggplot2 object :x: The x value expression :y: The y value expression :type: The type of plot to make """ args = util.Namespace(args) import rpy2.robjects.lib.ggplot2 as ggplot2 pp = gp + ggplot2.aes_string(x=args.x, y=args.y) if args.type == 'points': pp += ggplot2.geom_point() elif args.type == 'lines': pp += ggplot2.geom_line() elif args.type == 'boxplot': pp += ggplot2.geom_boxplot() else: raise Exception("{0} not implemented".format(args.type)) if args.facets is not None: try: pp += ggplot2.facet_grid(ro.Formula(args.facets)) except Exception: pass try: pp.plot() except Exception: pass
def run_nlme(jacobian): # this object have to be defined within the function to avoid funny results due to concurrent execution fixed_effects = ro.Formula( 'Jacobian ~ I(Age^2) + Gender:I(Age^2) + Age + Gender:Age + Gender') # assign variables fixed_effects.environment["Subject"] = Subject fixed_effects.environment["Visit"] = Visit fixed_effects.environment["Age"] = Age fixed_effects.environment["Gender"] = Gender # update jacobian variable fixed_effects.environment["Jacobian"] = ro.FloatVector(jacobian) # allocate space for output result = np.zeros(shape=[12], dtype=np.float64, order='C') try: # run linear mixed-effect model l = base.summary( nlme.lme(fixed_effects, random=random_effects, method="ML")) # extract coeffecients result[0:6] = l.rx2('coefficients').rx2('fixed')[:] # extract t-values result[6:12] = l.rx2('tTable').rx(True, 4)[:] except RRuntimeError: # probably model didn't converge pass return result
def _edger_func_fit_glm(the_data, the_method, the_formula, common_disp=False, **vars): if the_method not in {'GLM', 'QLGLM'}: raise NotImplementedError("Only GLM and QLGLM methods are supported at present") fit = None rdata = pandas2ri.py2ri(the_data) formula = robjects.Formula(the_formula) for k, v in vars.items(): formula.environment[k] = robjects.FactorVector(v) y = r("DGEList")(rdata) y = r("calcNormFactors")(y) design = r("model.matrix")(formula) if common_disp: # use a common estimate of the dispersion rather than using experimental structure # this is helpful where we have no replicates y = r("estimateGLMCommonDisp")(y, method='deviance', robust=True, subset=robjects.NULL) else: y = r("estimateDisp")(y, design) if the_method == 'GLM': fit = r('glmFit')(y, design) elif the_method == 'QLGLM': fit = r('glmQLFit')(y, design) return fit, design
def test_setenvironment(): fml = robjects.Formula('y ~ x') newenv = robjects.baseenv['new.env']() env = fml.getenvironment() assert not newenv.rsame(env) fml.setenvironment(newenv) env = fml.getenvironment() assert newenv.rsame(env)
def testReprNonVectorInList(self): vec = robjects.ListVector( OrderedDict(( ('a', 1), ('b', robjects.Formula('y ~ x')), ))) s = repr(vec).split('\n') self.assertEqual('[IntVector, Formula]', s[2].strip())
def _fit_survival(self, X, event, time): data = pandas.concat((X, time, event), axis=1) formula = robjects.Formula("Surv({0}, {1}) ~ .".format(time.name, event.name)) rdata = _convert_to_r(data) params = self._get_r_params() self._set_fit_features(lambda v: v != time.name and v != event.name, X) self._fit(formula, rdata, params)
def testSetenvironment(self): fml = robjects.Formula("y ~ x") newenv = robjects.baseenv['new.env']() env = fml.getenvironment() self.assertFalse(newenv.rsame(env)) fml.setenvironment(newenv) env = fml.getenvironment() self.assertTrue(newenv.rsame(env))
def boxplotFormulae(self, outPath, x, y, dataframe, **kwargs): """ Makes a boxplot out of an x and y formulae and a dataframe. Uses: http://stat.ethz.ch/R-manual/R-devel/library/graphics/html/boxplot.html @type outPath: string @param outPath: Path for the output file @type x: rpy2.robjects.vectors.FloatVector or rpy2.robjects.vectors.IntVector @param x: First argument given to R.Formula (see http://rpy.sourceforge.net/rpy2/doc-2.2/html/robjects_formulae.html for details) @type y: rpy2.robjects.vectors.FloatVector or rpy2.robjects.vectors.IntVector @param y: Second argument given to R.Formula (see http://rpy.sourceforge.net/rpy2/doc-2.2/html/robjects_formulae.html for details) @type dataframe: rpy2.robjects.DataFrame @param dataframe: An R dataframe with in the columns the values for each boxplot and as column name the name for the x-axis @param kwargs: Additional arguments. See defaultdict in getParams documentation for a full list of possible arguments. @raise TypeError: plotArgs not a dictionary @raise TypeError: dataframe is not of type dataframe @raise TypeError: x is not of type rpy2.robjects.vectors.FloatVector or of type rpy2.robjects.vectors.IntVector @raise TypeError: y is not of type rpy2.robjects.vectors.FloatVector or of type rpy2.robjects.vectors.IntVector B{Example}: Plotting a boxplot. The boxplot from dataframe is easier to use. >>> import rpy2.robjects as R >>> x = R.IntVector([1,1,4,0,3]) >>> y = R.IntVector([0,0,4,3,2]) >>> dataframe = R.DataFrame({'a':R.IntVector([1,1,4,0,3]),'b':R.IntVector([0,0,4,3,2]), 'c':R.IntVector([2,3,4,2,1])'}) >>> plots = rPlots.Plots() >>> plots.boxplotDataframe('example_output.png', x, y, dataframe, width=400, height=400, ... title='feature and ms/ms per intensity', xlab = 'log 10 of intensity', ylab = '# of MS/MS per feature' ) >>> R.r['dev.off']() """ if not isinstance(dataframe, R.DataFrame): raise TypeError, 'dataframe is not of type rpy2.robjects.DataFrame. Instead, is of type: '+str(type(dataframe)) if not isinstance(x, R.IntVector) and not isinstance(x, R.FloatVector): raise TypeError, 'x given to boxplotFormulae is not of type rpy2.robjects.vectors.FloatVector or of type rpy2.robjects.vectors.IntVector. Instead, is of type: '+str(type(x)) if not isinstance(y, R.IntVector) and not isinstance(y, R.FloatVector): raise TypeError, 'y given to boxplotFormulae is not of type rpy2.robjects.vectors.FloatVector or of type rpy2.robjects.vectors.IntVector. Instead, is of type: '+str(type(y)) # getting the parameter values (the ones given to the function, or default if the param wasn't given to the function) params = self.getParams(dict(**kwargs)) formula = R.Formula('x ~ y') env = formula.environment env['x'] = x env['y'] = y boxplot = R.r['boxplot'] R.r.png(outPath, width=params['width'], height=params['height']) boxplot(formula, data = dataframe) # because plotArgs cannot be given to hist when it is None (gives an error), check if it has a value if params['plotArgs'] == None: R.r['boxplot'](formula, data = dataframe, main = params['title'], ylab = params['ylab'], xlab = params['xlab']) else: if not type(params['plotArgs']) == dict: raise TypeError, 'plotArgs given to rFeaturePerIntensityHistogram has to be of type dict. Instead, is of type: '+str(type(params['plotArgs'])) R.r['boxplot'](formula, data = dataframe, main = params['title'], ylab = params['ylab'], xlab = params['xlab'], **params['plotArgs'])
def regressSpec(w, wL, X): #,sigma2=1,intercept=True): # compute s s = -1j*w # TODO, if regression fails, it might be because there is no exponential # term, maybe do a second regression then on a linear model. a = 0 # Linear rT2 = 0.1 # T2 regressed r = robjects.r # Variable shared between R and Python robjects.globalenv['a'] = a robjects.globalenv['rT2'] = rT2 robjects.globalenv['wL'] = wL robjects.globalenv['nb'] = 0 s = robjects.ComplexVector(numpy.array(s)) XX = robjects.ComplexVector(X) Xr = robjects.FloatVector(numpy.real(X)) Xi = robjects.FloatVector(numpy.imag(X)) Xa = robjects.FloatVector(numpy.abs(X)) Xri = robjects.FloatVector(numpy.concatenate((Xr,Xi))) #my_lower = robjects.r('list(a=.001, rT2=.001, nb=.0001)') my_lower = robjects.r('list(a=.001, rT2=.001)') #my_upper = robjects.r('list(a=1.5, rT2=.300, nb =100.)') my_upper = robjects.r('list(a=1.5, rT2=.300)') #my_list = robjects.r('list(a=.2, rT2=0.03, nb=.1)') my_list = robjects.r('list(a=.2, rT2=0.03)') my_cont = robjects.r('nls.control(maxiter=5000, warnOnly=TRUE, printEval=FALSE)') #fmla = robjects.Formula('Xri ~ c(a*Re((wL) / (wL^2+(s+1/rT2)^2 )), a*Im((wL)/(wL^2 + (s+1/rT2)^2 )))') # envelope ##fmla = robjects.Formula('Xri ~ c(a*Re((wL) / (wL^2+(s+1/rT2)^2 )), a*Im((wL)/(wL^2 + (s+1/rT2)^2 )))') # envelope #fmla = robjects.Formula('XX ~ a*(wL) / (wL^2 + (s+1/rT2)^2 )') # complex #fmla = robjects.Formula('Xa ~ abs(a*(wL) / (wL^2 + (s+1/rT2)^2 )) + nb') # complex fmla = robjects.Formula('Xa ~ abs(a*(wL) / (wL^2 + (s+1/rT2)^2 ))') # complex env = fmla.getenvironment() env['s'] = s env['Xr'] = Xr env['Xa'] = Xa env['Xi'] = Xi env['Xri'] = Xri env['XX'] = XX #fit = robjects.r.tryCatch(robjects.r.nls(fmla,start=my_list, control=my_cont)) #, lower=my_lower, algorithm='port')) #, \ fit = robjects.r.tryCatch(robjects.r.nls(fmla, start=my_list, control=my_cont, lower=my_lower, upper=my_upper, algorithm='port')) #, \ report = r.summary(fit) #print report #print r.warnings() a = r['$'](report,'par')[0] rT2 = r['$'](report,'par')[1] nb = r['$'](report,'par')[2] return a, rT2, nb
def quaReg(x, y): import rpy2.robjects as robjects rObj = robjects.r qreg = robjects.packages.importr(name='quantreg') rObj.assign('xr', robjects.FloatVector(x)) rObj.assign('yr', robjects.FloatVector(y)) rObj("dt <- data.frame(xr,yr)") fmla = robjects.Formula('')
def test_repr_nonvectorinlist(): vec = robjects.ListVector( OrderedDict(( ('a', 1), ('b', robjects.Formula('y ~ x')), ))) s = repr(vec) assert s.startswith("R object with classes: ('list',) mapped to:%s" "[IntSexpVector, LangSexpVector]" % os.linesep)
def get_breakpoints_and_levels(id_user): model = pickle.loads(open('../data/models/{}'.format(id_user), 'rb').read()) freq = model['freq'] formula = robjects.Formula('freq_tweet ~ 1') env = formula.environment env['freq_tweet'] = robjects.r['ts'](robjects.FloatVector(freq.values), start=freq.min()) breakpoints = robjects.r['breakpoints'](formula) fitted = robjects.r['fitted'](breakpoints, breaks=len(breakpoints[0])) return {freq.index[int(i)]:fitted[int(i)] for i in [0.0]+list(breakpoints[0])}