コード例 #1
0
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
d['h_bin'] = encoder.fit_transform(pd.cut(d['hp'], 5))
gam_model = LinearGAM().fit(d[['disp', 'wt', 'vs', 'h_bin']], d['mpg'])
print(gam_model.summary())
gam_predictions = gam_model.predict(d[['disp', 'wt', 'vs', 'h_bin']])
gam_mse = np.mean((gam_predictions - d['mpg'])**2)
print('MSE:', gam_mse)

#Performing classification (logistic regression) with the GAM
d['mpg_bin'] = encoder.fit_transform(pd.cut(d['mpg'], [0, 20, 100]))
gam_model = LogisticGAM().gridsearch(d[['disp', 'wt', 'vs', 'h_bin']],
                                     d['mpg_bin'])
print(gam_model.summary())
print('Classification Accuracy:',
      gam_model.accuracy(d[['disp', 'wt', 'vs', 'h_bin']], d['mpg_bin']))

#This models the conditional probabilities for mpg being < 20 and >=20
#Note the y-axis of these plots is the logit
'''
-------------------------------------------------------------------------------
-------------------Classification and Regression Trees-------------------------
-------------------------------------------------------------------------------
'''

#Regression tree
reg_tree = DecisionTreeRegressor(criterion='mse', min_samples_split=20).fit(
    d.drop(['mpg', 'mpg_bin'], axis=1), d['mpg'])
print(reg_tree.get_params)
for i, f in enumerate(d.drop(['mpg', 'mpg_bin'], axis=1).columns):
    print('Importance of', f, reg_tree.feature_importances_[i])
コード例 #2
0
tumors.loc[tumors['diagnosis'] == 'M',
           'diagnosis'] = 1  #Se cambia la variable a tipo binario.
tumors.loc[tumors['diagnosis'] == 'B', 'diagnosis'] = 0  #
tumors_X = tumors.iloc[:, :11].drop(
    ['diagnosis'], axis=1).values  #Separamos las variables independientes
tumors_y = tumors['diagnosis']  #Separamos las variables dependientes.

# In[22]:

gam = LogisticGAM(n_splines=20).gridsearch(tumors_X, tumors_y)
gam.summary()

# In[23]:

print('La precisión del módelo es:',
      round(gam.accuracy(tumors_X, tumors_y) * 100, 2), "%")

# In[24]:

titles = tumors.columns[1:11]
plt.figure()
fig, axs = plt.subplots(1, 10, figsize=(40, 8))
for i, ax in enumerate(axs):
    XX = gam.generate_X_grid(term=i)
    ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX))
    ax.plot(XX[:, i],
            gam.partial_dependence(term=i, X=XX, width=.95)[1],
            c='b',
            ls='--')
    if i == 0:
        ax.set_ylim(-30, 30)