def run_lrgs(x, y, err_x, err_y, _xycov=None, nmc=500, dirichlet=True): ''' Runs the lrgs regression algorithm written in R by interfacing through rpy2. For our purposes, inputs should be in scaled (log) form. (For the moment, only works for on-diagonal elements of the covariance matrix.) nmc is the length of the markov chain. ''' # pylint: disable = too-many-arguments # pylint: disable = too-many-locals # Make sure dimensions are correct assert np.size(x) == np.size(y) assert np.size(err_x) == np.size(err_y) assert np.size(x) == np.size(err_x) # Convert x and y to r vectors rx = robjects.FloatVector(x) ry = robjects.FloatVector(y) rx_err = robjects.FloatVector(err_x) ry_err = robjects.FloatVector(err_y) # Set up covariance matrix M = RARRAY(0.0, dim=RC(2, 2, np.size(rx))) for i in range(np.size(rx)): M.rx[1, 1, i + 1] = rx_err[i] M.rx[2, 2, i + 1] = ry_err[i] # Set some R equivalents TRUE = robjects.BoolVector([True]) FALSE = robjects.BoolVector([False]) if dirichlet: d = TRUE else: d = FALSE # Run MCMC posterior = RLRGS.Gibbs_regression(rx, ry, M, nmc, dirichlet=d, trace='bsg', mention_every=50) # Extract relevant data from posterior B = np.array(posterior[0]) # Parameter chain S = np.array(posterior[1])[0][0] # ^ Scatter chain (only intrinsic scatter for the moment!) # Prepare lrgs fit chains intercept = B[0][0] slope = B[1][0] sigma = np.sqrt(S) # Return fit parameters consistently with run_linmix return (intercept, slope, sigma)
def run_lme(signal, mask): effects = ro.Formula('signal ~ group + visit + (1|subject)') good_voxels = np.sum(mask > 0.5) effects.environment["mask"] = rm = ro.BoolVector(mask > 0.5) effects.environment["signal"] = ro.FloatVector(signal).rx(rm) # assign variables effects.environment["subject"] = subject.rx(rm) effects.environment["visit"] = visit.rx(rm) effects.environment["group"] = group.rx(rm) # allocate space for output result = np.zeros(8) result[0] = good_voxels if good_voxels > 4: try: # run linear mixed-effect model m = base.summary(lme.lmer(effects)) # extract DF (for the visit) result[1] = m.rx2('coefficients').rx(True, 'df')[2] # extract coeffecients result[2:5] = m.rx2('coefficients').rx(True, 'Estimate')[:] # extract t-values result[5:8] = m.rx2('coefficients').rx(True, 't value')[:] except RRuntimeError: # probably model didn't converge pass else: # not enough information pass return result
def get_response_matrix(self): matrix = {} matrix_index = 0 # For each question: for question_index in range(self.test_length): question = self.questions[question_index] # Cannot have questions where either 100% or 0% were correct, as ltm will crash. # This also excludes questions the user has opted to discard. if not question.discard: # Header value. question_response_vector = [] # Retrieve all the responses for each student. for j in range(len(self.students)): question_response_vector.append( self.students[j].is_right(question_index)) # question_response_vector.append(1) matrix_index += 1 else: # Otherwise, create a vector of NA objects. question_response_vector = [robjects.NA_Logical] * len( self.students) # Convert to a vector. matrix[question_index + 1] = robjects.BoolVector(question_response_vector) # Convert the dictionary of vectors to a dataframe. response_matrix = robjects.DataFrame(matrix) return response_matrix
def convert_dict(obj): if all([isinstance(x, str) for x in obj]): return ro.StrVector(obj) elif all([isinstance(x, int) | isinstance(x, float) for x in obj]): return ro.IntVector(obj) elif all([isinstance(x, bool) for x in obj]): return ro.BoolVector(obj) elif all([isinstance(x, float) for x in obj]): return ro.FloatVector(obj) return ro.ListVector(obj)
def pathifier(disease_name): model = DataReader().read_network_model() X, y = DataReader().read_data(disease_name) pre = DynamicPreprocessing(['metabolic-standard']) X = pre.fit_transform(X, y) import pdb pdb.set_trace() df = pd.DataFrame(X) metabolite_fold_changes = robj.r.matrix(robj.FloatVector( df.as_matrix().T.ravel().tolist()), nrow=df.shape[1]) all_metabolite_ids = robj.StrVector(list(df)) subsystem_metabolite = defaultdict(set) for r in model.reactions: if r.subsystem and not (r.subsystem.startswith('Transport') or r.subsystem.startswith('Exchange')): subsystem_metabolite[r.subsystem] \ .update(m.id for m in r.metabolites if m.id in df) pathway_names, pathway_metabolites = zip( *filter(lambda x: x[1], subsystem_metabolite.items())) pathway_metabolites = robj.r['list']( *map(lambda x: robj.StrVector(list(x)), pathway_metabolites)) pathway_names = robj.StrVector(list(pathway_names)) is_healthy = robj.BoolVector(list(map(lambda x: x == 'h', y))) pathifier = importr("pathifier") result = pathifier.quantify_pathways_deregulation(metabolite_fold_changes, all_metabolite_ids, pathway_metabolites, pathway_names, is_healthy, attempts=100, min_exp=0, min_std=0) regScores = dict() for pathway, scores in dict(result.items())['scores'].items(): regScores[pathway] = list(scores[:]) df = pd.DataFrame(regScores) df.insert(0, 'stage', y) df.to_csv('../dataset/disease/%s_regulization.csv' % disease_name, index=False)
def as_r_vector(o, val_type): if isinstance(o, dict): keys = o.keys() vals = [o[k] for k in keys] robj = as_r_vector(vals, val_type=val_type) robj.setnames(keys) else: if val_type == int: robj = robjects.IntVector(o) elif val_type == float: robj = robjects.FloatVector(o) elif val_type == bool: robj = robjects.BoolVector(o) else: robj = robjects.RVector(o) return robj
def pyArrayToRVector(X, rName=None, nanToNA=True): """Convert an array-like object to a vector in the R workspace. Args: X (array-like): Array to convert. Entries must be integer, float, or boolean type. rName (str): Name of the R variable to which to assign the vector. If None, a new variable name is auto-generated. nanToNA (bool): If True, nan in Python is converted to NA in R. nan in Python typically represents missing data. In R, missing data is represented by NA. (R has both NaN and NA; Python has no dedicated type for missing values.) Returns: str: The R variable to which the vector is assigned. Raises: NotImplementedError: If the type of X is unsupported. """ X = np.asarray(X).ravel() if ( str(X.dtype).startswith('int') or str(X.dtype).startswith('uint') ): rVector = ro.IntVector(X) elif str(X.dtype).startswith('float'): rVector = ro.FloatVector(X) elif str(X.dtype).startswith('bool'): rVector = ro.BoolVector(X) else: raise NotImplementedError( 'Only int, float, and bool are currently supported.' ) if rName is None: rName = genVarName() r.assign(rName, rVector) if nanToNA: r( '%s[ is.nan(%s) ] <- NA' % (rName, rName) ) return rName
def heatmap(self, plotfile, zscore=False): ''' plots a heatmap set zscore=True to use a divergent colour scale ''' # to do: add option to parse design file and add coloured row for # variable specified in design file. plotHeatmap = R(''' function(df, zscore){ library("Biobase") library("RColorBrewer") library("gplots") if(zscore[1]==TRUE){ #hmcol <- colorRampPalette(colors = c("red", "white", "blue")) PuOr <- brewer.pal(11, "PuOr") hmcol <- c(colorRampPalette(c(PuOr[1], PuOr[6]))(100), colorRampPalette(c(PuOr[6], PuOr[11]))(100)[-1]) } else{ hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100) } png("%(plotfile)s", width=1000, height=1000, units="px") heatmap.2(as.matrix(df), col = hmcol, scale="none", trace="none", margin=c(18, 10), dendrogram="both", cexCol=2, labRow = "", hclustfun = function(x) hclust(x, method = 'average'), distfun = dist) dev.off() }''' % locals()) r_counts = pandas2ri.py2ri(self.table) plotHeatmap(r_counts, ro.BoolVector([zscore]))
def make_rvector(col, ct=COLTYPE.FLOAT): """Make and return an R vector for data in `col` of COLTYPE ct. Returns: robjects.Vector Raises: TypeError if the type is unknown TypeError if it is COLTYPE.DATE but not parseable """ if ct == COLTYPE.INT: vec = robjects.IntVector(col) elif ct == COLTYPE.FLOAT: vec = robjects.FloatVector(col) elif ct == COLTYPE.STR: # Use I() from R.base library to avoid conversion # into a factor. Usually though a factor is what you want. vec = base.I(robjects.StrVector(col)) elif ct == COLTYPE.BOOL: vec = robjects.BoolVector(col) elif ct == COLTYPE.FACTOR: # conversion will happen automatically vec = robjects.StrVector(col) elif ct == COLTYPE.DATE: field = col[0] if isinstance(field, datetime.datetime): tcol = map(datetime_to_sec, col) elif isinstance(field, float): tcol = col else: raise TypeError("Bad date type '%s' for column %d, '%s'. " "Expected time.struct_time, " "datetime.datetime, or float." % ( type(field), i, colnames[i])) vec = robjects.FloatVector(tcol) else: raise TypeError("Unknown type '%s' for column %d, '%s'." % ( type(field), i, colnames[i])) return(vec)
def create_vector(v_list, desired_type=None): is_bool = True is_int = True is_float = True is_str = True for elt in v_list: if type(elt) == str: is_bool = False is_int = False is_float = False elif type(elt) == float: is_bool = False is_int = False elif type(elt) == int: is_bool = False else: is_bool = False is_int = False is_float = False is_str = False break if is_bool and (desired_type is None or desired_type == bool): return robjects.BoolVector(v_list) elif is_int and (desired_type is None or desired_type == int): res = [int(elt) for elt in v_list] return robjects.IntVector(res) elif is_float and (desired_type is None or desired_type == float): res = [float(elt) for elt in v_list] return robjects.FloatVector(res) elif is_str and (desired_type is None or desired_type == str): res = [str(elt) for elt in v_list] return robjects.StrVector(res) if desired_type is not None: raise TypeException("Cannot coerce vector to type '%s'" % desired_type) return robjects.RVector(v_list)
octave.eval("test_prob = predict(bbq_model, test_scores, 1)", verbose=False) bbq_prob = octave.pull('test_prob', verbose=False) bbq_prob = np.array([item[0] for item in bbq_prob]) bbq_metrics.append(isotonic.get_metrics(test_class, bbq_prob, k=k)) # Create isotonic regression model ir_model = IsotonicRegression(y_min=y_min, y_max=y_max, out_of_bounds='clip') ir_model.fit(X=training_scores, y=training_class) ir_prob = isotonic.predict(ir_model, test_scores) ir_metrics.append(isotonic.get_metrics(test_class, ir_prob, k=k)) # Create ENIR model using R: enir_model = enir.enir_build( robjects.FloatVector(training_scores.tolist()), robjects.BoolVector(training_class.tolist())) enir_prob = enir.enir_predict(enir_model, robjects.FloatVector(test_scores.tolist())) # Convert to numpy.array: enir_prob = np.array(enir_prob) enir_metrics.append(isotonic.get_metrics(test_class, enir_prob, k=k)) # Create weighted (by likelihood) averaged bootstrapped isotonic regression. # I am using the identical IR models for BIR, which is basically also an # ensemble model but where all models have equal weight. wabir_model = isotonic.train_wabir(training_class, training_scores) wabir_prob = isotonic.predict_wabir(wabir_model, test_scores) wabir_metrics.append(isotonic.get_metrics(test_class, wabir_prob, k=k)) # Estimating bir-probabilities using the same IR models as generated by wabir: bir_prob = isotonic.predict_wabir(wabir_model, test_scores,
raise TypeException("Cannot coerce vector to type '%s'" % desired_type) return robjects.RVector(v_list) def vector_conv(v, desired_type=None): v_list = eval(v) return create_vector(v_list, desired_type) RVector = new_constant('RVector', staticmethod(vector_conv), robjects.RVector([]), staticmethod(lambda x: isinstance(x, robjects.RVector))) def bool_vector_conv(v): return vector_conv(v, bool) RBoolVector = new_constant('RBoolVector' , staticmethod(bool_vector_conv), robjects.BoolVector([]), staticmethod(lambda x: isinstance(x, robjects.RVector)), base_class=RVector) def int_vector_conv(v): return vector_conv(v, int) RIntVector = new_constant('RIntVector' , staticmethod(int_vector_conv), robjects.IntVector([]), staticmethod(lambda x: isinstance(x, robjects.RVector)), base_class=RVector) def float_vector_conv(v): return vector_conv(v, float) RFloatVector = new_constant('RFloatVector' , staticmethod(float_vector_conv),
def predict(self, Xtest, num_predicted_frames=8, ycol0=0): ''' Make predictions of the next num_predicted_frames frames. Start at variable ycol0 only (do not predict the values of the first 0 to ycol0-1 variables). For this example we predict persistence of the last frame.''' vprint(self.verbose, "Model :: ========= Making predictions =========") vprint(self.verbose, "===============================================") start = time.time() #Ytest = np.array([Xtest[random.randint(0,10),ycol0:]] * num_predicted_frames) ###################### # import rpy2's package module import rpy2 import rpy2.robjects as robjects import rpy2.robjects.packages as rpackages from rpy2.robjects.packages import importr # import R's "base" package base = rpackages.importr('base') # import R's utility package utils = rpackages.importr('utils') # select a mirror for R packages utils.chooseCRANmirror(ind=1) # select the first mirror in the list if rpy2.robjects.packages.isinstalled( 'forecast', lib_loc=rpy2.__path__[0]) == False: utils.install_packages('forecast', lib=rpy2.__path__[0]) forecast = importr('forecast', lib_loc=rpy2.__path__[0]) ts = robjects.r('ts') #from rpy2.robjects.vectors import FloatVector #from rpy2.robjects.vectors import IntVector #from rpy2.robjects.vectors import BoolVector #from rpy2.robjects import pandas2ri from rpy2.robjects import pandas2ri from rpy2.robjects import vectors pandas2ri.activate() ###################### Ytest = np.zeros((7, 57)) # Code assumes daily data (not aggregated. Arima will break if it's run on aggregated data. # I've provided commented code that should undo aggrgation in inputs into model and redo # aggregation to return the predictions (Ytest) # undo aggregation: future_starts = [] for col in range(ycol0, Xtest.shape[1]): init = Xtest[0, col] for row in range(1, Xtest.shape[0]): Xtest[row, col] -= init init += Xtest[row, col] future_starts.append(init) for col in range(ycol0, Xtest.shape[1]): #print(col) dtp = num_predicted_frames - 1 # days to predict ndpat = num_predicted_frames # number days to predict at a time dat = Xtest[1:, col] #print(dat) #print(len(dat)) sum_RMSE = 0 f = ts(dat, frequency=1, start=1, end=len(dat)) best_params = robjects.IntVector([0, 0, 0]) best_RMSE = 1000000 for p in range(1, 5): for q in range(0, 5): for d in range(0, 3): try: t_order = robjects.IntVector([p, d, q]) fit2 = forecast.Arima(f, order=t_order, xreg=robjects.r("NULL"), include_mean=True, include_drift=False, biasadj=False, method="ML", model=robjects.r("NULL")) RMSE = forecast.accuracy(fit2)[0][2] #RMSE if RMSE < best_RMSE: best_RMSE = RMSE best_params = robjects.IntVector([p, d, q]) except: continue best_opts = robjects.BoolVector([True, False]) possible_opts = robjects.BoolVector([True, False]) for mean_opt in range(0, 1): for drift_opt in range(0, 1): mean_opt = possible_opts[mean_opt] drift_opt = possible_opts[drift_opt] fit2 = forecast.Arima(f, order=best_params, xreg=robjects.r("NULL"), include_mean=mean_opt, include_drift=drift_opt, biasadj=False, method="ML", model=robjects.r("NULL")) RMSE = forecast.accuracy(fit2)[0][2] #RMSE if (RMSE < best_RMSE): #print(paste("Reset best_params to (p,d,q) = (", p, ",", d, ",", q , ")", sep = "")) best_RMSE = RMSE best_opts = robjects.BoolVector([mean_opt, drift_opt]) #print("best params = ", best_params) #print("best opts = ", best_opts) fit2 = forecast.Arima(f, order=best_params, xreg=robjects.r("NULL"), include_mean=best_opts[0], include_drift=best_opts[1], biasadj=False, method="ML", model=robjects.r("NULL")) # print(forecast.forecast(fit2, ndpat)) # print(forecast.forecast(fit2, ndpat)[0]) # print(forecast.forecast(fit2, ndpat)[1]) # print(forecast.forecast(fit2, ndpat)[2]) # print(forecast.forecast(fit2, ndpat)[3]) Ytest[:, col] = forecast.forecast(fit2, ndpat)[3] #print(Ytest) #print(Xtest.shape) # (78, 57) #print(Xtest.shape[0]) # 78 #print(Ytest.shape) # typically (7, 57) # reconstruct aggregated predictions for col in range(ycol0, Xtest.shape[1]): init = future_starts[col] for row in range(0, num_predicted_frames - 1): tinc = init init += Ytest[row, col] Ytest[row, col] += tinc end = time.time() vprint(self.verbose, "[+] Success, predictions made in %5.2f sec" % (end - start)) vprint(self.verbose, "Model :: ======== Predictions finished ========") return Ytest
def preprocess(analysis_id): a = Analysis.objects.get(id=analysis_id) # Get GEM directory gem_dir = os.path.join(settings.MEDIA_ROOT, a.gem.name) # Directory to store processed data store_dir = os.path.join(settings.MEDIA_ROOT, 'analyses/user_{0}/{1}'.format(a.user.id, a.id)) ri.initr() # Import libraries base = importr('base') geoquery = importr('GEOquery') # Check if file starts with !Series_title, otherwise getGEO never stops with open(gem_dir) as f: first_line = f.readline() if not first_line[:14].startswith("!Series_title\t"): a.status = "-2. Preprocessing failed: invalid gene expression matrix format" a.save() return "invalid gene expression matrix format" # Get GEO series matrix and extract GEM try: gsm = geoquery.getGEO(filename=gem_dir, getGPL=False) except: a.status = "-2. Preprocessing failed: invalid gene expression matrix format" a.save() return "invalid gene expression matrix format" try: gem = gsm.slots['assayData']['exprs'] # Remove any genes with NAs row_keep = ro.IntVector( np.argwhere(np.array(ro.r.rowSums(ro.r['is.na'](gem))) == 0) + 1) gem = gem.rx(row_keep, True) # Remove any genes with all 0s row_keep = ro.IntVector(np.nonzero(np.array(ro.r.rowSums(gem)))[0] + 1) gem = gem.rx(row_keep, True) # Write to CSV ro.r['write.table'](gem, file=os.path.join(store_dir, 'gem.csv')) except: a.status = "-2. Preprocessing failed: invalid gene expression matrix format" a.save() return "invalid gene expression matrix format" try: # Get pheno data pheno_data = gsm.slots['phenoData'] pheno_data = pheno_data.slots['data'] # Extract explicitly defined characteristics char_index = ro.r['!'](ro.r.grepl('characteristics|date', ro.r.names(pheno_data))) char = pheno_data.rx(True, char_index) gene_name = ro.r.rownames(char) char = ro.r['data.frame'](ro.r.lapply(char, ro.r['as.character']), stringsAsFactors=False) char = ro.r.cbind(char, **{'gene.name': gene_name}, stringsAsFactors=False) # Data cleaning for i in range(char.nrow): for j in range(char.ncol - 1): char_val = char.rx(i + 1, j + 1)[0].strip() if char_val == "": char.rx[i + 1, j + 1] = "unknown" elif char_val == "None" or char_val == "NONE" or char_val == "none": char.rx[i + 1, j + 1] = "none" else: char.rx[i + 1, j + 1] = char_val # Remove columns where more than 80% of unique values have less than 5 occurrences col_keep = np.repeat([True], char.ncol) for i in range(char.ncol - 1): unique_count = ro.r.table(char.rx(True, i + 1)) unique_length = ro.r.length(unique_count)[0] if unique_length == 1: col_keep[i] = False continue count = 0 for j in range(unique_length): if ro.r.names(unique_count).rx(j + 1)[0] == "unknown": unique_length = unique_length - 1 elif unique_count.rx(j + 1)[0] < 5: count = count + 1 if count / unique_length > 0.8: col_keep[i] = False char = char.rx(True, ro.BoolVector(col_keep)) # Change all values with less than 5 occurences to "Other" for i in range(char.ncol - 1): rare_char = np.where( np.array(ro.r.table(char.rx2(i + 1))) < 5)[0] + 1 rare_char = ro.r.names(ro.r.table(char.rx2(i + 1))).rx( ro.IntVector(rare_char)) if ro.r.length(rare_char)[0] == 1: rare_char = ro.IntVector([]) if ro.r.length(rare_char)[0] != 0: for j in range(char.nrow): if str(char.rx(j + 1, i + 1)[0]) in np.array(rare_char): print(np.array(rare_char)) char.rx[j + 1, i + 1] = "Other" # Write to CSV ro.r['write.table'](char, file=os.path.join(store_dir, 'characteristics.csv')) except: a.char_ok = False a.save() a.status = "2. Ready for analysis" a.save() return "success"
sampler.run_mcmc(starting_guesses, nsteps) sample = sampler.chain # shape = (nwalkers, nsteps, ndim) ests = [np.mean(sample[:, :, j]) for j in range(ndim)] intercept = ests[0] slope = ests[1] gs = [ests[j + 2] for j in range(len(x))] print gs cut = min(0.5, np.percentile(gs, 15)) typical = [g >= cut for g in gs] pdf = ro.DataFrame({'x': ro.FloatVector(x), \ 'y': ro.FloatVector(y), \ 'e': ro.FloatVector(e), \ 'ymin': ro.FloatVector(y-e), \ 'ymax': ro.FloatVector(y+e), \ 'yest': ro.FloatVector(slope*x+intercept), \ 'typical': ro.BoolVector(typical)}) rprint(pdf) gpf = ggplot2.ggplot(pdf) ppf = gpf + \ ggplot2.geom_point(ggplot2.aes_string(x='x', y='y',\ color='typical',shape='typical'),size=5) + \ ggplot2.geom_errorbar(ggplot2.aes_string(x='x', ymin='ymin', ymax='ymax')) +\ ggplot2.geom_line(ggplot2.aes_string(x='x', y='yest')) grdevices.png(file="fit.png", width=512, height=512) print(ppf) grdevices.dev_off()
def testNewBoolVector(self): vec = robjects.BoolVector([True, False]) self.assertEqual(True, vec[0]) self.assertEqual(False, vec[1]) self.assertEqual(2, len(vec))
def select_feature_lr_wrapper(n_para, x, y, model_type, fit_intercept = False): n_r, n_f = x.shape if model_type == 'nr': general_simple = LogisticRegression() general_simple.fit(x, y) original_model_paras = general_simple.coef_[0] index = np.argsort(abs(original_model_paras))[::-1] list_of_select_features = index[:n_para] new_x = x[:, list_of_select_features] lr_refit = LogisticRegression() lr_refit.fit(new_x, y) return np.concatenate((lr_refit.coef_[0], lr_refit.intercept_)), 1 - lr_refit.score(new_x, y) if model_type == 'bs': min_err = float('inf') min_ls_f = None ls_f_arr = list(combinations(range(n_f), n_para)) for ls_f in ls_f_arr: x_sub = x[:,ls_f] general_simple = LogisticRegression(fit_intercept = fit_intercept) general_simple.fit(x_sub, y) err = 1 - general_simple.score(x_sub, y) if err < min_err: min_err = err min_ls_f = ls_f x_sub = x[:,min_ls_f] lr_refit = LogisticRegression() lr_refit.fit(x[:,min_ls_f], y) if fit_intercept: return np.concatenate((lr_refit.intercept_, lr_refit.coef_[0])), 1 - lr_refit.score(x_sub, y), min_ls_f return lr_refit.coef_[0], 1 - lr_refit.score(x_sub, y), list(min_ls_f) if model_type == 'vs': import rpy2.robjects as ro r = ro.r from rpy2.robjects.numpy2ri import numpy2ri rpy2.robjects.numpy2ri.activate() r.library("glmnet") r_x = ro.r.assign('dummy', x) r_y = ro.r.assign('dummy', y) r_fit_intercept = ro.BoolVector((fit_intercept,)) r( ''' var_select<-function(x,y,degree, fit_intercept){ fit = glmnet(x, y, intercept = fit_intercept) df<-fit$df index = which(df<=degree) index = index[length(index)] lambda = fit$lambda[index] coefs = coef(fit, s=lambda) if(fit_intercept){ res = which(abs(coefs)>0) }else{ res = which(abs(coefs[2:length(coefs)])>0) } return(res-1) } ''' ) active_ind = np.asarray(r.var_select(r_x, r_y, n_para, r_fit_intercept[0])).tolist() active_ind = [int(x) for x in active_ind] lr_refit = LogisticRegression(fit_intercept=fit_intercept) lr_refit.fit(x[:, active_ind], y) if fit_intercept: return np.concatenate((lr_refit.intercept_, lr_refit.coef_[0])), \ 1 - lr_refit.score(x[:, active_ind], y), active_ind return lr_refit.coef_[0], 1 - lr_refit.score(x[:, active_ind], y), active_ind
def testNALogical(self): vec = robjects.BoolVector((True, False, True)) vec[0] = robjects.NA_Logical self.assertTrue(robjects.baseenv['is.na'](vec)[0])
import atddm import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt import pytz # from datetime import time from constants import COLORS, TZONES, CODES, BEGDT, ENDDT import rpy2.robjects as robjects from rpy2.robjects.packages import importr from rpy2.robjects import pandas2ri r = robjects.r TRUE = robjects.BoolVector([True]) FALSE = robjects.BoolVector([False]) pandas2ri.activate() dgof = importr('dgof') dweib = importr('DiscreteWeibull') def format_time_interval(t1, t2): return '{h1:02d}:{m1:02d}--{h2:02d}:{m2:02d}'.format(h1=t1.hour, m1=t1.minute, h2=t2.hour, m2=t2.minute) def formatter_float_n_digits(x, n): return '{x:.{n}f}'.format(x=x, n=n)
def test_nalogical(): vec = robjects.BoolVector((True, False, True)) vec[0] = robjects.NA_Logical assert robjects.baseenv['is.na'](vec)[0] is True
def test_init_boolvector(): vec = robjects.BoolVector([True, False]) assert vec[0] is True assert vec[1] is False assert len(vec) == 2