def randomForest_predict(self, fit_model, data): """ 03-17-06 2006-10-30, add avg_degree(vertex_gradient) and unknown_cut_off """ if self.debug: sys.stderr.write("Predicting by randomForest...\n") data = array(data) set_default_mode(NO_CONVERSION) data_frame = r.as_data_frame( { "p_value": data[:, 0], "recurrence": data[:, 1], "connectivity": data[:, 2], "cluster_size": data[:, 3], "gradient": data[:, 4], "avg_degree": data[:, 5], "unknown_ratio": data[:, 6], "is_correct": r.factor(data[:, -1]), } ) set_default_mode(BASIC_CONVERSION) pred = r.predict(fit_model, data_frame) del data_frame if self.debug: sys.stderr.write("Done randomForest prediction.\n") return pred
def randomForest_fit(self, known_data, parameter_list, bit_string="1111111"): """ 03-17-06 2006-10-302006-10-30, add avg_degree(vertex_gradient) and unknown_cut_off """ if self.debug: sys.stderr.write("Fitting randomForest...\n") mty = parameter_list[0] from rpy import r r._libPaths( os.path.join(lib_path, "R") ) # better than r.library("randomForest", lib_loc=os.path.join(lib_path, "R")) (see plone doc) r.library("randomForest") coeff_name_list = [ "p_value", "recurrence", "connectivity", "cluster_size", "gradient", "avg_degree", "unknown_ratio", ] # 2006-10-30 formula_list = [] for i in range(len(bit_string)): if bit_string[i] == "1": formula_list.append(coeff_name_list[i]) formula = r("is_correct~%s" % "+".join(formula_list)) known_data = array(known_data) set_default_mode(NO_CONVERSION) data_frame = r.as_data_frame( { "p_value": known_data[:, 0], "recurrence": known_data[:, 1], "connectivity": known_data[:, 2], "cluster_size": known_data[:, 3], "gradient": known_data[:, 4], "avg_degree": known_data[:, 5], "unknown_ratio": known_data[:, 6], "is_correct": r.factor(known_data[:, -1]), } ) # 03-17-06, watch r.factor #2006-10-30 if mty > 0: fit = r.randomForest(formula, data=data_frame, mty=mty) else: fit = r.randomForest(formula, data=data_frame) del data_frame if self.debug: sys.stderr.write("Done fitting randomForest.\n") return fit