pineapple_num = pd.DataFrame({"Age": 12, "Fee": 0, "VideoAmt": 0, "PhotoAmt": 1}, index = [0]) pineapple_stand = pd.DataFrame(scaler.transform(pineapple_num)) pineapple_stand.columns = pineapple_num.columns pineapple_cat = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]).reshape(-1, 19) pineapple_cat_df = pd.DataFrame(pineapple_cat) pineapple_cat_df.columns = x_train_stand.columns[:-4] pineapple = pd.concat([pineapple_cat_df, pineapple_num], axis = 1) #%% initial model building (LASSO logistic regression) logistic_regression = Logit(y_train.values, x_train_stand) alpha = np.linspace(0, 1000, 101) auc = [] for a in alpha: rslt = logistic_regression.fit_regularized(alpha = a, disp = False) prediction = rslt.predict(exog = x_test_stand) auc.append(roc_auc_score(y_test, prediction)) auc = np.array(auc) # 0 alpha gives the best auc, therefore we can use the regular logistic regression logistic_result = logistic_regression.fit() logistic_prediction = logistic_result.predict(exog = x_test_stand) logistic_result.summary() auc_score = round(roc_auc_score(y_test, logistic_prediction), 2) #%% ROC curve def ROC(true, prediction, model): y_test = true prediction = prediction fpr, tpr, t = roc_curve(y_true = y_test, y_score = prediction)
y = data["Adopted"] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 55) label_encoder = preprocessing.LabelEncoder() y = label_encoder.fit_transform(y) y = pd.get_dummies(y, prefix_sep = "_", prefix = None) y = y[["YES", "NO"]].values.astype(float) # logit = sm.GLM(y, x, family = sm.families.Binomial()) # Lasso_results = logit.fit_regularized(alpha = 100, L1_wt = 1) alpha = np.linspace(0, 100, 101) model = Logit(y_train, x_train) params = [] for a in alpha: rslt = model.fit_regularized(alpha = a, disp = False) params.append(rslt.params) params = np.asarray(params) plt.figure(figsize = (10, 5)) plt.clf() plt.axes([0.1, 0.1, 0.67, 0.8]) ag = [] for k in range() model = Logit(y_train, x_train) rslt1 = model.fit_regularized(alpha = 100, disp = False) rslt1.summary() prediction = rslt1.predict(exog = x_test) x1 = x.values