def submit_predictions(self, curs, schema_instance, prediction_pair2instance, cluster_id2properties): sys.stderr.write("Submitting predictions...\n") MpiPredictionFilter_instance = MpiPredictionFilter() MpiPredictionFilter_instance.createGeneTable(curs, schema_instance.p_gene_table) no_of_total_genes = get_no_of_total_genes(curs) go_no2gene_no_set = get_go_no2gene_no_set(curs) counter = 0 for prediction_pair, p_attr_instance in prediction_pair2instance.iteritems(): #1st fill those empty items properties = cluster_id2properties[p_attr_instance.mcl_id] vertex_set = properties[2] p_attr_instance.p_value_cut_off = cal_hg_p_value(p_attr_instance.gene_no, p_attr_instance.go_no,\ vertex_set, no_of_total_genes, go_no2gene_no_set, r) p_attr_instance.avg_p_value = p_attr_instance.p_value_cut_off p_attr_instance.connectivity_cut_off = properties[0] p_attr_instance.cluster_size_cut_off = len(vertex_set) p_attr_instance.unknown_cut_off = properties[1] MpiPredictionFilter_instance.submit_to_p_gene_table(curs, schema_instance.p_gene_table, p_attr_instance) counter += 1 if self.report and counter%2000==0: sys.stderr.write("%s%s"%('\x08'*20, counter)) if self.report: sys.stderr.write("%s%s"%('\x08'*20, counter)) sys.stderr.write("Done.\n")
def run(self): """ 11-09-05 11-09-05 add rpart_cp 11-10-05 add need_cal_hg_p_value --db_connect() --form_schema_tables() --form_schema_tables() --get_no_of_total_genes() --get_go_no2gene_no_set() --data_fetch() --get_vertex_list() --cal_hg_p_value() --rpart_fit_and_predict() --MpiPredictionFilter_instance....() --record_data() """ (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) old_schema_instance = form_schema_tables(self.fname1) new_schema_instance = form_schema_tables(self.fname2) no_of_total_genes = get_no_of_total_genes(curs) go_no2gene_no_set = get_go_no2gene_no_set(curs) prediction_ls, all_data, known_data = self.data_fetch(curs, old_schema_instance, self.filter_type, self.is_correct_type, \ no_of_total_genes, go_no2gene_no_set, need_cal_hg_p_value) """ testing_acc_ls, training_acc_ls = self.rpart_validation(known_data, self.no_of_buckets, self.rpart_cp, \ self.loss_matrix, self.prior_prob) print testing_acc_ls print training_acc_ls """ pred, pred_training = self.rpart_fit_and_predict(all_data, known_data, self.rpart_cp, self.loss_matrix, self.prior_prob) MpiPredictionFilter_instance = MpiPredictionFilter() MpiPredictionFilter_instance.view_from_table(curs, old_schema_instance.splat_table, new_schema_instance.splat_table) MpiPredictionFilter_instance.view_from_table(curs, old_schema_instance.mcl_table, new_schema_instance.mcl_table) MpiPredictionFilter_instance.view_from_table(curs, old_schema_instance.pattern_table, new_schema_instance.pattern_table) MpiPredictionFilter_instance.createGeneTable(curs, new_schema_instance.p_gene_table) self.record_data(curs, MpiPredictionFilter_instance, prediction_ls, pred, new_schema_instance) if self.commit: curs.execute("end")
def run(self): """ 11-09-05 11-09-05 add rpart_cp 11-10-05 add need_cal_hg_p_value 11-23-05 rpart_fit_and_predict() is split 2006-12-05 add need_output_data_for_R flag --db_connect() --form_schema_tables() --form_schema_tables() --get_no_of_total_genes() --get_go_no2gene_no_set() --data_fetch() --get_vertex_list() --cal_hg_p_value() --output_data_for_R() --rpart_fit() --rpart_predict() --rpart_predict() --MpiPredictionFilter_instance....() --record_data() """ (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) old_schema_instance = form_schema_tables(self.fname1) new_schema_instance = form_schema_tables(self.fname2) no_of_total_genes = get_no_of_total_genes(curs) go_no2gene_no_set = get_go_no2gene_no_set(curs) unknown_prediction_ls, known_prediction_ls, unknown_data, known_data = self.data_fetch( curs, old_schema_instance, self.filter_type, self.is_correct_type, no_of_total_genes, go_no2gene_no_set, need_cal_hg_p_value, ) if self.need_output_data_for_R: # 2006-12-05 self.output_data_for_R(known_data, "%s.known" % self.fname1) self.output_data_for_R(unknown_data, "%s.unknown" % self.fname1) """ testing_acc_ls, training_acc_ls = self.rpart_validation(known_data, self.training_perc, self.rpart_cp, \ self.loss_matrix, self.prior_prob) print testing_acc_ls print training_acc_ls """ fit_model = self.fit_function_dict[self.type](known_data, self.parameter_list_dict[self.type], self.bit_string) known_pred = self.predict_function_dict[self.type](fit_model, known_data) unknown_pred = self.predict_function_dict[self.type](fit_model, unknown_data) if self.debug: if self.type == 2: # randomForest's model has its own oob prediction fit_model_py = fit_model.as_py(BASIC_CONVERSION) print self.cal_accuracy(known_data, fit_model_py["predicted"], pred_type=1) print self.cal_accuracy(known_data, known_pred, pred_type=self.type) print self.cal_accuracy(unknown_data, unknown_pred, pred_type=self.type) if self.commit: MpiPredictionFilter_instance = MpiPredictionFilter() MpiPredictionFilter_instance.view_from_table( curs, old_schema_instance.splat_table, new_schema_instance.splat_table ) MpiPredictionFilter_instance.view_from_table( curs, old_schema_instance.mcl_table, new_schema_instance.mcl_table ) MpiPredictionFilter_instance.view_from_table( curs, old_schema_instance.pattern_table, new_schema_instance.pattern_table ) MpiPredictionFilter_instance.createGeneTable(curs, new_schema_instance.p_gene_table) self.record_data( curs, MpiPredictionFilter_instance, unknown_prediction_ls, unknown_pred, new_schema_instance, pred_type=self.type, ) if ( self.type == 2 ): # 2006-10-31 randomForest's model has its own oob prediction, but use rpart's way of storing prediction fit_model_py = fit_model.as_py(BASIC_CONVERSION) known_pred = fit_model_py["predicted"] self.record_data( curs, MpiPredictionFilter_instance, known_prediction_ls, known_pred, new_schema_instance, pred_type=1, ) else: self.record_data( curs, MpiPredictionFilter_instance, known_prediction_ls, known_pred, new_schema_instance, pred_type=self.type, ) curs.execute("end")