def fit_with_logistic(self, threshold=0.5): formula = "%s~%s" % (self.y_col, "+".join(self.x_cols)) model = smf.glm(formula, data=self.train_set, family=sm.families.Binomial()) result = model.fit() predict_probs = result.predict(exog=self.test_set) real_values = self.test_set[self.y_col].map(lambda x: 1 if x == 'No' else 0) tp.output_table_with_prob(predict_probs, real_values, threshold=threshold, zero_one_col_texts=["Yes", "No"])
def output_binary_table(self, res, predict_probs, real_values, glm_fit=True): header = "predict real" model = res.model print header ''' #If transform y column to 0-1 in advance, the model.endog_names would be one variable, not a list output_data = DataFrame([[0, 0], [0, 0]], columns=[0,1], index=[0,1]) zero_one_columns = [0, 1] ''' zero_one_columns = self.get_real_zero_one_columns(res) if glm_fit else [0, 1] tp.output_table_with_prob(predict_probs, real_values, zero_one_col_texts=zero_one_columns)