def rpart_predict(self, fit_model, data): """ 11-23-05 split from rpart_fit_and_predict() """ if self.debug: sys.stderr.write("Doing rpart_predict...\n") data = array(data) set_default_mode(NO_CONVERSION) data_frame = r.as_data_frame( { "p_value": data[:, 0], "recurrence": data[:, 1], "connectivity": data[:, 2], "cluster_size": data[:, 3], "gradient": data[:, 4], "is_correct": data[:, -1], } ) set_default_mode(BASIC_CONVERSION) pred = r.predict(fit_model, data_frame, type=["class"]) # 11-17-05 type=c("class") del data_frame if self.debug: sys.stderr.write("Done rpart_predict.\n") return pred
def randomForest_predict(self, fit_model, data): """ 03-17-06 2006-10-30, add avg_degree(vertex_gradient) and unknown_cut_off """ if self.debug: sys.stderr.write("Predicting by randomForest...\n") data = array(data) set_default_mode(NO_CONVERSION) data_frame = r.as_data_frame( { "p_value": data[:, 0], "recurrence": data[:, 1], "connectivity": data[:, 2], "cluster_size": data[:, 3], "gradient": data[:, 4], "avg_degree": data[:, 5], "unknown_ratio": data[:, 6], "is_correct": r.factor(data[:, -1]), } ) set_default_mode(BASIC_CONVERSION) pred = r.predict(fit_model, data_frame) del data_frame if self.debug: sys.stderr.write("Done randomForest prediction.\n") return pred
def rpart_fit_and_predict(self, all_data, known_data, rpart_cp, loss_matrix, prior_prob, bit_string='11111'): """ 11-09-05 1st use known_data to get the fit model 2nd use the fit model to do prediction on all_data, result is prob for each class 11-09-05 add rpart_cp 11-17-05 add loss_matrix, prior_prob return two pred """ sys.stderr.write("rpart fitting and predicting...\n") r.library("rpart") coeff_name_list = ['p_value', 'recurrence', 'connectivity', 'cluster_size', 'gradient'] formula_list = [] for i in range(len(bit_string)): if bit_string[i] == '1': formula_list.append(coeff_name_list[i]) #11-17-05 transform into array all_data = array(all_data) known_data = array(known_data) set_default_mode(NO_CONVERSION) data_frame = r.as_data_frame({"p_value":known_data[:,0], "recurrence":known_data[:,1], "connectivity":known_data[:,2], \ "cluster_size":known_data[:,3], "gradient":known_data[:,4], "is_correct":known_data[:,-1]}) if prior_prob: prior_prob = [prior_prob, 1-prior_prob] #get the full list fit = r.rpart(r("is_correct~%s"%'+'.join(formula_list)), data=data_frame, method="class", control=r.rpart_control(cp=rpart_cp),\ parms=r.list(prior=prior_prob, loss=r.matrix(loss_matrix) ) ) else: fit = r.rpart(r("is_correct~%s"%'+'.join(formula_list)), data=data_frame, method="class", control=r.rpart_control(cp=rpart_cp),\ parms=r.list(loss=r.matrix(loss_matrix) ) ) set_default_mode(BASIC_CONVERSION) pred_training = r.predict(fit, data_frame, type=["class"]) del data_frame set_default_mode(NO_CONVERSION) all_data_frame = r.as_data_frame({"p_value":all_data[:,0], "recurrence":all_data[:,1], "connectivity":all_data[:,2], \ "cluster_size":all_data[:,3], "gradient":all_data[:,4], "is_correct":all_data[:,-1]}) set_default_mode(BASIC_CONVERSION) pred = r.predict(fit, all_data_frame, type=["class"]) #11-17-05 type=c("class") del all_data_frame sys.stderr.write("Done rpart fitting and predicting.\n") return pred, pred_training
def __call__(self, x, se=False): """ Predict new values with smoother x - new values (must be same dimensions as input; check self.nvar) se - if True, return standard errors for each value as well (default False) """ result = r.predict(self.smoother, x, se=se) if se: return nx.asarray(result['fit']), nx.asarray(result['se.fit']) else: return nx.asarray(result)
def estimate_pi0(self, lambda_list, pi0_list): """ 01-19-06 Storey2003, (natural) cubic spline, df=3 """ sys.stderr.write("Estimating pi0...\n") rpy.set_default_mode(rpy.NO_CONVERSION) s = r.smooth_spline(lambda_list, pi0_list, df=3) rpy.set_default_mode(rpy.BASIC_CONVERSION) estimated_pi0 = r.predict(s, 1)['y'] print "\t estimated_pi0:", estimated_pi0 sys.stderr.write("Done.\n") return estimated_pi0
def estimate_pi0(self, lambda_list, pi0_list): """ 01-19-06 Storey2003, (natural) cubic spline, df=3 """ sys.stderr.write("Estimating pi0...\n") rpy.set_default_mode(rpy.NO_CONVERSION) s = r.smooth_spline(lambda_list, pi0_list, df=3) rpy.set_default_mode(rpy.BASIC_CONVERSION) estimated_pi0 = r.predict(s,1)['y'] print "\t estimated_pi0:", estimated_pi0 sys.stderr.write("Done.\n") return estimated_pi0
def fitPoly(xarray, yarray, order): r.lm.local_mode(rpy.NO_CONVERSION) xl=list(xarray) yl=list(yarray) modelDef = "y ~ poly(x,%d)" % order model=r.lm(r(modelDef), data=r.data_frame(x=xl,y=yl)) pred=r.predict(model) # pred is now a dict with keys from '1' to 'N', where N is the size of xl predvals = [] for i in range(len(xl)): predvals.append(pred[str(i+1)]) return(xl, predvals)