from sklearn.preprocessing import LabelEncoder encoder = LabelEncoder() d['h_bin'] = encoder.fit_transform(pd.cut(d['hp'], 5)) gam_model = LinearGAM().fit(d[['disp', 'wt', 'vs', 'h_bin']], d['mpg']) print(gam_model.summary()) gam_predictions = gam_model.predict(d[['disp', 'wt', 'vs', 'h_bin']]) gam_mse = np.mean((gam_predictions - d['mpg'])**2) print('MSE:', gam_mse) #Performing classification (logistic regression) with the GAM d['mpg_bin'] = encoder.fit_transform(pd.cut(d['mpg'], [0, 20, 100])) gam_model = LogisticGAM().gridsearch(d[['disp', 'wt', 'vs', 'h_bin']], d['mpg_bin']) print(gam_model.summary()) print('Classification Accuracy:', gam_model.accuracy(d[['disp', 'wt', 'vs', 'h_bin']], d['mpg_bin'])) #This models the conditional probabilities for mpg being < 20 and >=20 #Note the y-axis of these plots is the logit ''' ------------------------------------------------------------------------------- -------------------Classification and Regression Trees------------------------- ------------------------------------------------------------------------------- ''' #Regression tree reg_tree = DecisionTreeRegressor(criterion='mse', min_samples_split=20).fit( d.drop(['mpg', 'mpg_bin'], axis=1), d['mpg']) print(reg_tree.get_params) for i, f in enumerate(d.drop(['mpg', 'mpg_bin'], axis=1).columns): print('Importance of', f, reg_tree.feature_importances_[i])
tumors.loc[tumors['diagnosis'] == 'M', 'diagnosis'] = 1 #Se cambia la variable a tipo binario. tumors.loc[tumors['diagnosis'] == 'B', 'diagnosis'] = 0 # tumors_X = tumors.iloc[:, :11].drop( ['diagnosis'], axis=1).values #Separamos las variables independientes tumors_y = tumors['diagnosis'] #Separamos las variables dependientes. # In[22]: gam = LogisticGAM(n_splines=20).gridsearch(tumors_X, tumors_y) gam.summary() # In[23]: print('La precisión del módelo es:', round(gam.accuracy(tumors_X, tumors_y) * 100, 2), "%") # In[24]: titles = tumors.columns[1:11] plt.figure() fig, axs = plt.subplots(1, 10, figsize=(40, 8)) for i, ax in enumerate(axs): XX = gam.generate_X_grid(term=i) ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX)) ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX, width=.95)[1], c='b', ls='--') if i == 0: ax.set_ylim(-30, 30)