pineapple_num = pd.DataFrame({"Age": 12, "Fee": 0, "VideoAmt": 0, "PhotoAmt": 1}, index = [0])
pineapple_stand = pd.DataFrame(scaler.transform(pineapple_num))
pineapple_stand.columns = pineapple_num.columns

pineapple_cat = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]).reshape(-1, 19)
pineapple_cat_df = pd.DataFrame(pineapple_cat)
pineapple_cat_df.columns = x_train_stand.columns[:-4]

pineapple = pd.concat([pineapple_cat_df, pineapple_num], axis = 1)

#%% initial model building (LASSO logistic regression)
logistic_regression = Logit(y_train.values, x_train_stand)
alpha = np.linspace(0, 1000, 101)
auc = []
for a in alpha:
    rslt = logistic_regression.fit_regularized(alpha = a, disp = False)
    prediction = rslt.predict(exog = x_test_stand)
    auc.append(roc_auc_score(y_test, prediction))
auc = np.array(auc)

# 0 alpha gives the best auc, therefore we can use the regular logistic regression
logistic_result = logistic_regression.fit()
logistic_prediction = logistic_result.predict(exog = x_test_stand)
logistic_result.summary()
auc_score = round(roc_auc_score(y_test, logistic_prediction), 2)

#%% ROC curve
def ROC(true, prediction, model):
    y_test = true
    prediction = prediction
    fpr, tpr, t = roc_curve(y_true = y_test, y_score = prediction)
예제 #2
0
y = data["Adopted"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 55)


label_encoder = preprocessing.LabelEncoder()
y = label_encoder.fit_transform(y)
y = pd.get_dummies(y, prefix_sep = "_", prefix = None)
y = y[["YES", "NO"]].values.astype(float)

# logit = sm.GLM(y, x, family = sm.families.Binomial())
# Lasso_results = logit.fit_regularized(alpha = 100, L1_wt = 1)
alpha = np.linspace(0, 100, 101)
model = Logit(y_train, x_train)
params = []
for a in alpha:
    rslt = model.fit_regularized(alpha = a, disp = False)
    params.append(rslt.params)
params = np.asarray(params)

plt.figure(figsize = (10, 5))
plt.clf()
plt.axes([0.1, 0.1, 0.67, 0.8])
ag = []
for k in range()

model = Logit(y_train, x_train)
rslt1 = model.fit_regularized(alpha = 100, disp = False)
rslt1.summary()
prediction = rslt1.predict(exog = x_test)

x1 = x.values