Example #1
0
# drop previous Embarked column
data_vars = data.columns.values.tolist()
to_keep = [i for i in data_vars if i not in cat_vars]
data = data[to_keep].copy()

print(data.dtypes)
X = data.drop(columns={'Survived'})
y = data['Survived']
logRegM = LogReg(random_state=0).fit(X, y)
logRegM = LogReg(random_state=0, solver='lbfgs', max_iter=1000).fix(X, y)
# go 1000 times to find the closest estimate around the edge.

# What is this object
type(logRegM)  # no clue..
# What can it do? Here is what fuctions you can do with this?
logRegM.__dir__()

z = logRegM.predict(X)
from sklearn.metrics import confusion_matrix as cm
cm(y, z)  # not that great...
# better print (load fn_confusionMatrixInfo)
confusionMatrixInfo(z, y)
#@@ to do - try to package that into a function
# which tries various combinations of predictors?

#%% ------- this is the R version codes ---------
# training error
# logreg.probs <- predict(logreg, type='response')
# tr.preds <- as.integer(logreg.probs > 0.5) # this is a cutoff (arbitrarily chosen by me !!!)

# install.packages("caret")