Ejemplo n.º 1
0
class AdaptiveLogisticGAM(BaseEstimator, RegressorMixin):
    def __init__(self, param_grid=None, gam_params=None):
        # create GAM
        if gam_params is None:
            gam_params = {}
        self.model = LogisticGAM(**gam_params)

        # set grid search parameters
        if param_grid is None:
            param_grid = GAM_GRID_BASE
        self.param_grid = param_grid

    def fit(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.values

        # fit using grid-search
        self.model.gridsearch(X, y, progress=False, **self.param_grid)

    def predict(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.values
        return self.model.predict(X)

    def predict_proba(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.values
        return self.model.predict_proba(X)
Ejemplo n.º 2
0
    def GAM2(self):
        """GAM of splines, where we perform variable selection
        to find the best model."""
        from pygam import LogisticGAM, s, l, f
        terms = s(0) + s(1) + s(2) + s(3) + s(4) + s(5) + s(6) + s(7)

        gam = LogisticGAM(terms=terms, fit_intercept=False)
        mod = gam.gridsearch(self.Xtrain.values, self.ytrain, \
            lam=np.logspace(-3, 3, 11))     # Generate the model
        mod.summary()  # Pseudo-R2: 0.6449
        ypred = mod.predict(self.Xtest)
        MSE1 = np.mean((self.ytest - ypred.reshape(-1, 1))**2).values

        if self.plot:
            plt.plot(range(len(ypred.reshape(-1,1))),\
                ypred.reshape(-1,1)-0.5,"r.", label='GAM model')
            plt.plot(range(len(self.ytest)),
                     self.ytest,
                     "b.",
                     label='Testing Data')
            plt.legend()
            plt.title("GAM model with linear terms. Prediction data is\n"\
                + "scaled downwards by 0.5 for visual purposes.")
            plt.ylabel("FFVC score")
            plt.xlabel("Sample no.")
            plt.show()
Ejemplo n.º 3
0
def simulation(No_T,n,p,box_plot=True):
    err=[]
    for i in range (No_T):
    #generate the test data
        X_train,Y_train=generate_data(n,p)
        X_test,Y_test= generate_data(n,p)
        
        logit_gam = LogisticGAM()
        logit_gam.gridsearch(X_train,Y_train)
        
        #calculate test error
        test_err=sum(logit_gam.predict(X_test)!=Y_test)/n
        err.append(test_err)
    if box_plot:
        plt.figure(num=None,figsize=(8,6),dpi=80)
        plt.boxplot(err)
        plt.text(1.1,0.15,"Mean:{:.2f}".format(np.mean(err)))
        plt.text(1.1,0.14,"Var:{:.3f}".format(np.var(err)))
        plt.title("logisticGAM")
        plt.ylabel("Test Error")
        plt.show()
Ejemplo n.º 4
0
gam1 = gam1.fit(trainX, trainy, weights=w)
import numpy as np
lams = np.random.rand(10, 33)  # random points on [0, 1], with shape (100, 3)
n_splines = [5, 10, 15, 20, 25]
lams = lams * 6  # shift values to -3, 3
lams = lams - 3
lams = np.exp(lams)
cons = [
    'convex', 'concave', 'monotonic_inc', 'monotonic_dec', 'circular', 'none'
]
random = LogisticGAM(aa).gridsearch(trainX,
                                    trainy,
                                    weights=w,
                                    lam=lams,
                                    n_splines=n_splines)
random = random.gridsearch(trainX, trainy, constraints=cons)
print(random.lam)
print(random.n_splines)
print(random.constraints)
print(random.accuracy(testX, testy))

from sklearn.metrics import confusion_matrix
preds = random.predict(testX)
print(confusion_matrix(testy, preds))
for i, term in enumerate(random.terms):
    if term.isintercept:
        continue
    XX = random.generate_X_grid(term=i)
    pdep, confi = random.partial_dependence(term=i, X=XX, width=0.95)
    plt.figure()
    plt.plot(XX[:, term.feature], pdep)
Ejemplo n.º 5
0
                                          meshgrid=True,
                                          width=.95)
    ax.plot(XX[0], pdep)
    ax.plot(XX[0], confi[:, 0], c='grey', ls='--')
    ax.plot(XX[0], confi[:, 1], c='grey', ls='--')
    ax.set_title(selected_features[i])

plt.show()

#-----------------------------------------------------
# Grid search with pyGAM

#default in pyGAM grid search is lambda space of {'lam':np.logspace(-3,3,11)}

gam3 = LogisticGAM()
gam3.gridsearch(X, y)

gam3.summary()
roc_auc_score(y, gam3.predict_proba(X))  #0.9936710533269911
gam3.accuracy(X, y)  #0.9560632688927944

#-----------------------------------------------------
# Generalizing a GAM

import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss

# We can split the data just like we usually would:
Ejemplo n.º 6
0
class EpidemicModels:

    # Sequential 6 layer neural network
    def returnSequential6(self):
        model = Sequential()
        model.add(Dense(50, input_dim=20, activation='relu'))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def returnSequential9(self):
        model = Sequential()
        model.add(Dense(80, input_dim=20, activation='relu'))
        model.add(Dense(70, activation='relu'))
        model.add(Dense(60, activation='relu'))
        model.add(Dense(50, activation='relu'))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def RNN(self):
        model = Sequential()
        model.add(SimpleRNN(2, input_dim=20))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def multi_RNN(self):
        model = Sequential()
        model.add(SimpleRNN(2, input_dim=20))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def baseline(self):
        # Create model
        model = Sequential()
        model.add(Dense(20, input_dim=20, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        # Compile model
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def lstm(self):
        model = Sequential()
        model.add(LSTM(10, input_dim=20))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='mean_absolute_error', optimizer='adam')
        return model

    def multi_lstm(self):
        model = Sequential()
        model.add(LSTM(4, input_dim=20, return_sequences=True))
        model.add(LSTM(4, input_dim=20))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    # Sequential 4 layer neural network
    def returnSequential2(self):
        model = Sequential()
        model.add(Dense(14, activation='relu', input_dim=20))
        model.add(Dense(units=7, activation='relu'))
        model.add(Dense(units=1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

        return model

    def __init__(self, m=1):
        if m == 0:
            self.model = self.baseline()
            self.type = 0
        elif m == 1:
            self.model = self.returnSequential2()
            self.type = 2
        elif m == 2:
            self.model = self.returnSequential6()
            self.type = 2
        elif m == 3:
            self.model = self.RNN()
            self.type = 1
        elif m == 4:
            self.model = self.multi_RNN()
            self.type = 1
        elif m == 5:
            self.model = self.lstm()
            self.type = 1
        elif m == 6:
            self.model = self.multi_lstm()
            self.type = 1
        elif m == 7:
            self.model = LogisticGAM()
            self.type = 3
        elif m == 8:
            self.model = self.returnSequential9()
            self.type = 2

    def returnModel(self):
        return self.model

    def train(self, X, y, bs=10, epochs=100):
        if self.type == 1:
            X = np.reshape(X, (X.shape[0], 1, X.shape[1]))
        if self.type == 3:
            self.model.gridsearch(X, y)
        else:
            self.model.fit(X, y, batch_size=bs, epochs=epochs, shuffle=True)

    def prediction(self, X):
        if self.type == 1:
            X = np.reshape(X, (X.shape[0], 1, X.shape[1]))
        return self.model.predict(X)

    def cross_eval(self, X, y, bs=10, ep=100, k=5):
        scores = []
        if self.type == 0:
            kf = KFold(n_splits=k, shuffle=True, random_state=0)
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0)
                a, score = self.model.evaluate(X_test, y_test, verbose=0)
                scores.append(score)
            return sum(scores) / len(scores)

        elif self.type == 1:
            kf = KFold(n_splits=k, shuffle=False, random_state=0)
            scores = []
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
                X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
                self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0)
                score = self.model.evaluate(X_test, y_test, verbose=0)
                scores.append(score)
            return sum(scores) / len(scores)

        elif self.type == 2:
            kf = KFold(n_splits=k, shuffle=True, random_state=0)
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0)
                a, score = self.model.evaluate(X_test, y_test, verbose=0)
                print(score)
                scores.append(score)
            return sum(scores) / len(scores)

        elif self.type == 3:
            kf = KFold(n_splits=k, shuffle=False, random_state=0)
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                self.model.gridsearch(X_train, y_train)
                y_pre = self.model.predict(X_test)
                print(y_pre)
                scores.append(f1_score(y_pre, y_test))
            return sum(scores) / len(scores)