Beispiel #1
0
def logistic_GAM(x_tr, y_tr, x_tst, y_tst):
    classifier = LogisticGAM()
    classifier.fit(x_tr, y_tr)
    tr_pred = classifier.predict(x_tr)
    y_pred = classifier.predict(x_tst)
    confusion_matrix = metrics.confusion_matrix(y_tst, y_pred)
    print(confusion_matrix)
    print('Accuracy of logistic regression classifier on test set: {:.2f}' \
          .format(metrics.accuracy_score(y_pred, y_tst)))
    print('Accuracy of logistic regression classifier on train set: {:.2f}' \
          .format(metrics.accuracy_score(tr_pred, y_tr)))
    '''
Beispiel #2
0
class AdaptiveLogisticGAM(BaseEstimator, RegressorMixin):
    def __init__(self, param_grid=None, gam_params=None):
        # create GAM
        if gam_params is None:
            gam_params = {}
        self.model = LogisticGAM(**gam_params)

        # set grid search parameters
        if param_grid is None:
            param_grid = GAM_GRID_BASE
        self.param_grid = param_grid

    def fit(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.values

        # fit using grid-search
        self.model.gridsearch(X, y, progress=False, **self.param_grid)

    def predict(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.values
        return self.model.predict(X)

    def predict_proba(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.values
        return self.model.predict_proba(X)
Beispiel #3
0
def simulation(No_T,n,p,box_plot=True):
    err=[]
    for i in range (No_T):
    #generate the test data
        X_train,Y_train=generate_data(n,p)
        X_test,Y_test= generate_data(n,p)
        
        logit_gam = LogisticGAM()
        logit_gam.gridsearch(X_train,Y_train)
        
        #calculate test error
        test_err=sum(logit_gam.predict(X_test)!=Y_test)/n
        err.append(test_err)
    if box_plot:
        plt.figure(num=None,figsize=(8,6),dpi=80)
        plt.boxplot(err)
        plt.text(1.1,0.15,"Mean:{:.2f}".format(np.mean(err)))
        plt.text(1.1,0.14,"Var:{:.3f}".format(np.var(err)))
        plt.title("logisticGAM")
        plt.ylabel("Test Error")
        plt.show()
Beispiel #4
0
n_splines = [5, 10, 15, 20, 25]
lams = lams * 6  # shift values to -3, 3
lams = lams - 3
lams = np.exp(lams)
cons = [
    'convex', 'concave', 'monotonic_inc', 'monotonic_dec', 'circular', 'none'
]
random = LogisticGAM(aa).gridsearch(trainX,
                                    trainy,
                                    weights=w,
                                    lam=lams,
                                    n_splines=n_splines)
random = random.gridsearch(trainX, trainy, constraints=cons)
print(random.lam)
print(random.n_splines)
print(random.constraints)
print(random.accuracy(testX, testy))

from sklearn.metrics import confusion_matrix
preds = random.predict(testX)
print(confusion_matrix(testy, preds))
for i, term in enumerate(random.terms):
    if term.isintercept:
        continue
    XX = random.generate_X_grid(term=i)
    pdep, confi = random.partial_dependence(term=i, X=XX, width=0.95)
    plt.figure()
    plt.plot(XX[:, term.feature], pdep)
    plt.plot(XX[:, term.feature], confi, c='r', ls='--')
    plt.title(names1[i])
    plt.show()
Beispiel #5
0
gam_pred_prob = gam_model.predict_proba(X_test)


gam_preds, complete_gam_dat = top_15_predictions(entire_test_data, gam_pred_prob )


gam_performance = all_nba_test_report(complete_gam_dat)


players_missed(complete_gam_dat)



gam_predict_probs_2020 = gam_model.predict_proba(features_2020)
gam_predict_binary_2020 = gam_model.predict(features_2020)

gam_predictions_2020 = predict_2020(positions=position_encoder.inverse_transform(features_2020['All_NBA_Pos']),
            player_names=current_dat['Player'],
            binary_prediction= gam_predict_binary_2020,
            probability_predictions= gam_predict_probs_2020)

gam_predictions_2020.to_csv("gam_predictions.csv")


##### ----- ##### ----- ##### ----- ##### -----# #### ----- ##### ----- ##### ----- ##### ----- #####
# Model 1.4 - KNN 
# --> Standardize all continuous features

# NOTE: We standardize the train set
# To standardize the test set, we use the mean and sd obtained from the train set (can't have leakage of info onto the test set)
Beispiel #6
0
# Generalizing a GAM

import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss

# We can split the data just like we usually would:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=1)
gam4 = LogisticGAM().gridsearch(X_train, y_train)

predictions = gam4.predict(X_test)
print("Accuracy: {} ".format(accuracy_score(y_test, predictions)))
probas = gam4.predict_proba(X_test)
print("Log Loss: {} ".format(log_loss(y_test, probas)))

# Accuracy: 0.925531914893617
# Log Loss: 0.15704862623168236

lambda_ = np.logspace(-3, 3, 3)
n_splines = [2, 5, 10, 20, 50]
constraints = [None, 'monotonic_inc', 'monotonic_dec']

#[‘convex’, ‘concave’, ‘monotonic_inc’, ‘monotonic_dec’,’circular’, ‘none’]

gam5 = LogisticGAM().gridsearch(
    X_train,
Beispiel #7
0
class EpidemicModels:

    # Sequential 6 layer neural network
    def returnSequential6(self):
        model = Sequential()
        model.add(Dense(50, input_dim=20, activation='relu'))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def returnSequential9(self):
        model = Sequential()
        model.add(Dense(80, input_dim=20, activation='relu'))
        model.add(Dense(70, activation='relu'))
        model.add(Dense(60, activation='relu'))
        model.add(Dense(50, activation='relu'))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def RNN(self):
        model = Sequential()
        model.add(SimpleRNN(2, input_dim=20))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def multi_RNN(self):
        model = Sequential()
        model.add(SimpleRNN(2, input_dim=20))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def baseline(self):
        # Create model
        model = Sequential()
        model.add(Dense(20, input_dim=20, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        # Compile model
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def lstm(self):
        model = Sequential()
        model.add(LSTM(10, input_dim=20))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='mean_absolute_error', optimizer='adam')
        return model

    def multi_lstm(self):
        model = Sequential()
        model.add(LSTM(4, input_dim=20, return_sequences=True))
        model.add(LSTM(4, input_dim=20))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    # Sequential 4 layer neural network
    def returnSequential2(self):
        model = Sequential()
        model.add(Dense(14, activation='relu', input_dim=20))
        model.add(Dense(units=7, activation='relu'))
        model.add(Dense(units=1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

        return model

    def __init__(self, m=1):
        if m == 0:
            self.model = self.baseline()
            self.type = 0
        elif m == 1:
            self.model = self.returnSequential2()
            self.type = 2
        elif m == 2:
            self.model = self.returnSequential6()
            self.type = 2
        elif m == 3:
            self.model = self.RNN()
            self.type = 1
        elif m == 4:
            self.model = self.multi_RNN()
            self.type = 1
        elif m == 5:
            self.model = self.lstm()
            self.type = 1
        elif m == 6:
            self.model = self.multi_lstm()
            self.type = 1
        elif m == 7:
            self.model = LogisticGAM()
            self.type = 3
        elif m == 8:
            self.model = self.returnSequential9()
            self.type = 2

    def returnModel(self):
        return self.model

    def train(self, X, y, bs=10, epochs=100):
        if self.type == 1:
            X = np.reshape(X, (X.shape[0], 1, X.shape[1]))
        if self.type == 3:
            self.model.gridsearch(X, y)
        else:
            self.model.fit(X, y, batch_size=bs, epochs=epochs, shuffle=True)

    def prediction(self, X):
        if self.type == 1:
            X = np.reshape(X, (X.shape[0], 1, X.shape[1]))
        return self.model.predict(X)

    def cross_eval(self, X, y, bs=10, ep=100, k=5):
        scores = []
        if self.type == 0:
            kf = KFold(n_splits=k, shuffle=True, random_state=0)
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0)
                a, score = self.model.evaluate(X_test, y_test, verbose=0)
                scores.append(score)
            return sum(scores) / len(scores)

        elif self.type == 1:
            kf = KFold(n_splits=k, shuffle=False, random_state=0)
            scores = []
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
                X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
                self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0)
                score = self.model.evaluate(X_test, y_test, verbose=0)
                scores.append(score)
            return sum(scores) / len(scores)

        elif self.type == 2:
            kf = KFold(n_splits=k, shuffle=True, random_state=0)
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0)
                a, score = self.model.evaluate(X_test, y_test, verbose=0)
                print(score)
                scores.append(score)
            return sum(scores) / len(scores)

        elif self.type == 3:
            kf = KFold(n_splits=k, shuffle=False, random_state=0)
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                self.model.gridsearch(X_train, y_train)
                y_pre = self.model.predict(X_test)
                print(y_pre)
                scores.append(f1_score(y_pre, y_test))
            return sum(scores) / len(scores)
Beispiel #8
0
for i in dates:
    # Fit Model to 90 Days of Data Lagged
    mask = (called_pitches['game_date'] <= i) & (called_pitches['game_date'] >=
                                                 (i - dt.timedelta(days=90)))
    set = called_pitches.loc[mask]
    df = pd.DataFrame(set)
    target_df = pd.Series(set.strikeCall)
    X = df[['plate_x', 'plate_z', 'sz_top', 'sz_bot']]  # 'bsCount'
    y = target_df
    gam_fit = LogisticGAM().fit(X, y)

    # Predict Each Day of Games Using 90 Day Model
    games = called_pitches.loc[called_pitches['game_date'] == i]
    x_games = games[['plate_x', 'plate_z', 'sz_top', 'sz_bot']]
    results = gam_fit.predict(x_games)
    called_pitches.at[called_pitches['game_date'] == i,
                      'pred_zone_last90'] = results

    # Track Progress and Reset Model
    print("Inserted..." + str(i))
    gam_fit = ""

called_pitches['zone_dev_last90'] = called_pitches[
    'strikeCall'] - called_pitches['pred_zone_last90']

# Create Unique Umpire-Date ID, Average Zone Deviation for each Umpire-Game, # of Called_Pitches
called_pitches['umpire_date'] = called_pitches['umpire'] + called_pitches[
    'game_date'].map(str)
umpDev90 = called_pitches[['game_date', 'umpire_date', 'umpire', 'game_pk']]
umpDev90 = pd.DataFrame(umpDev90)