def fitness_function(self, individual, data):
        if self.state.model == 'Artificial Neural Network':
            print('Next Individual')
            model = MLPClassifier(
                hidden_layer_sizes=data[0][individual[0]],
                activation='relu',
                solver='adam',
                alpha=data[1][individual[1]],
                batch_size=data[2][individual[2]],
                learning_rate='constant',
                learning_rate_init=data[3][individual[3]],
                power_t=0.5,
                max_iter=30,
                shuffle=True,
                random_state=1,
                tol=0.0001,
                verbose=False,
                warm_start=False,
                momentum=0.9,
                nesterovs_momentum=True,
                early_stopping=False,
                validation_fraction=0.18,  # 0.33 0.18
                beta_1=0.9,
                beta_2=0.999,
                epsilon=1e-08,
                n_iter_no_change=data[4][individual[4]],
                max_fun=15000)

            model.fit(self.X_train, self.y_train)
            prediction = model.predict(self.X_test)
            score = accuracy_score(self.y_test, prediction)

        if self.state.model == "Support Vector Machine":
            print('Next Individual')
            model = sklearn.svm.SVC(
                C=data[0][individual[0]],
                kernel=data[1][individual[1]],
                degree=data[2][individual[2]],
                gamma=data[3][individual[3]],
                coef0=0.0,
                shrinking=data[4][individual[4]],
                probability=data[5][individual[5]],
                tol=0.001,
                cache_size=200,
                class_weight=None,
                verbose=True,
                max_iter=-1,
                decision_function_shape=data[6][individual[6]],
                break_ties=False,
                random_state=None)
            model.fit(self.X_train, self.y_train)
            prediction = model.predict(self.X_test)
            score = accuracy_score(self.y_test, prediction)

        if self.state.model == 'Convolutional Neural Network':
            print('Next Individual')
            model = Sequential()
            model.add(
                Conv1D(filters=data[0][individual[0]],
                       kernel_size=data[4][individual[4]],
                       activation='relu',
                       input_shape=self.input_shape))
            model.add(
                Conv1D(filters=data[0][individual[0]],
                       kernel_size=data[4][individual[4]],
                       activation='relu',
                       input_shape=self.input_shape))
            model.add(BatchNormalization())
            model.add(MaxPooling1D(pool_size=2))
            model.add(
                Conv1D(filters=data[1][individual[1]],
                       kernel_size=data[5][individual[5]],
                       activation='relu'))
            model.add(
                Conv1D(filters=data[1][individual[1]],
                       kernel_size=data[5][individual[5]],
                       activation='relu'))
            model.add(BatchNormalization())
            model.add(MaxPooling1D(pool_size=2))
            model.add(
                Conv1D(filters=data[2][individual[2]],
                       kernel_size=data[6][individual[6]],
                       activation='relu'))
            model.add(
                Conv1D(filters=data[2][individual[2]],
                       kernel_size=data[6][individual[6]],
                       activation='relu'))
            model.add(BatchNormalization())
            model.add(MaxPooling1D(pool_size=2))
            model.add(
                Conv1D(filters=data[3][individual[3]],
                       kernel_size=data[7][individual[7]],
                       activation='relu'))
            model.add(
                Conv1D(filters=data[3][individual[3]],
                       kernel_size=data[7][individual[7]],
                       activation='relu'))
            model.add(BatchNormalization())
            model.add(MaxPooling1D(pool_size=2))
            model.add(Dropout(0.25))
            model.add(Flatten())
            model.add(Dense(64, activation='relu'))
            model.add(BatchNormalization())
            model.add(Dropout(0.25))
            model.add(Dense(32, activation='relu'))
            model.add(BatchNormalization())
            model.add(Dropout(0.25))
            model.add(Dense(16, activation='relu'))
            model.add(BatchNormalization())
            model.add(Dropout(0.25))
            model.add(Dense(self.number_of_classes, activation='softmax'))

            model.compile(loss=self.loss,
                          optimizer=tensorflow.keras.optimizers.Adadelta(),
                          metrics=self.metrics)

            model.fit(self.X_train,
                      np.array(self.y_train),
                      batch_size=128,
                      epochs=25,
                      verbose=1,
                      validation_data=(self.xx_val, np.array(self.yy_val)))

            score = model.evaluate(self.xx_test,
                                   np.array(self.yy_test),
                                   verbose=1)[1]

        if self.state.model == 'Recurrent Neural Network':
            print('Next Individual')
            model = Sequential()
            model.add(
                LSTM(data[0][individual[0]],
                     input_shape=self.input_shape,
                     return_sequences=False))
            model.add(Dropout(0.25))

            model.add(Dense(self.number_of_classes, activation='softmax'))

            model.compile(loss=self.loss,
                          optimizer='adam',
                          metrics=self.metrics)

            model.fit(self.X_train,
                      np.array(self.y_train),
                      batch_size=128,
                      epochs=20,
                      verbose=1,
                      validation_data=(self.xx_val, np.array(self.yy_val)))

            score = model.evaluate(self.xx_test,
                                   np.array(self.yy_test),
                                   verbose=1)[1]

        return score
Beispiel #2
0
def make_network(FILENAME,sklearn=False,keras=False,normalize=True,spread=False,moneyline=False,tpot = False):
    from pandas import read_csv,get_dummies
    import numpy as np
    from sklearn import cross_validation
    from sklearn.neural_network import MLPClassifier
    
    
    """
    Given the csv input of all the box scores, arrange it such that the home and away teams are lined up, 
    unnecessary columns removed, and hot encoding is done. Other stuff too probably. Such as normalization, but I 
    didn't do that!
    
    Note that this data has already been doctored from its original form, taking out most unnecessary columns but
    those could be useful later on.
    
    
    Parameters
    ----------
    FILENAME : file
        The csv of the data.
        
    sklearn : bool
        True or false designation for if you want the MLP to be based on an sklearn version 
    keras : bool
        True or false designation for if you want the MLP to be more manually designed using Keras. 

    normalize : bool
        True or false designation for if you want to set all relevant inputs onto the same scale. 
        
    spread : bool
        True or false designation for if you want to predict the spread. 

    
    moneyline : bool 
        True or false designation for if you want to predict the outright winner. 

    
    
        
    Returns
    -------
    
    
    model : NN
        The neural network fitted on the training and validation dataset so it can be applied to future data. 
    
    scaler : 
        The scale used to normalize the training data in order to impose this onto the prediction data. 
        
    """
    
    #Read in file, remove attempted and # and only account for % since that's more predictive in nature. 
    #*retrospectively that doesn't make sense, could be worth changing!
    data = read_csv(FILENAME) 
    data['3P%'] = np.divide(data['3P'].values,data['3PA'].values) 
    del data['3P'],data['3PA']
    data['FG%'] = np.divide(data['FG'].values,data['FGA'].values)
    del data['FG'],data['FGA']
    data['FT%'] = np.divide(data['FT'].values,data['FTA'].values)
    del data['Unnamed: 0'],data['GAME_ID'],data['Date'],data['Home'],data['Away'],data['PLUS_MINUS'],data['TOTAL']
    del data['FT'],data['FTA']
    data = get_dummies(data)

    #print(data)
    

    dat = []
    
    #reshape the dataset so now each colummn has roadstats and homestats concatenated into the same row, used for NN 
    
    for i in range(len(data.values)):
        data.values[i] = np.reshape(data.values[i],newshape=[1,len(data.values[i])])
    for p in range(int(len(data.values)/2)):
        fullboxgame = np.concatenate((data.values[2*p],data.values[(2*p)+1]))
        dat.append(fullboxgame)
    
    #convert list to array, now possible to array operations previously not possible
    dat = np.array(dat)   
    
    openingspreadS = dat[:,8] #what the predicted spread of ther game was. 
    roadpts = dat[:,7]       #column of all the points scored by road team 
    homepts = dat[:,52]
    endspreadS = roadpts-homepts  #all the final spreads of the game
            #concatenating all the arrays, looks messy but explanation doen in another nb. 
    x1 = dat[:,0:7] #road offensive rebounds to blocks
    x2 = dat[:,9:42] # road 3p% to team name (hot encoded)

    x3 = dat[:,45:52] #home offensive rebounds to blocks
    x4  =  dat[:,54:87] #home 3p% to hot encoded team name   
                      
    x5 = dat[:,8]              
    X1 = np.concatenate((x1,x2),axis=1)
    X2 = np.concatenate((x3,x4),axis=1)
    X3 = np.concatenate((X1,X2),axis=1)
    
    y = []
    
    if spread:
        #include initial spread of the game. 
        X = np.column_stack((X3,x5))

        for j in range(len(endspreadS)):  
            openspread = openingspreadS[j]
       # print("this is the spread of the road team " + str(openspread))
            endspread = endspreadS[j]
       # print("the road team won by  .. " + str(endspread))
       # if endspread>openspread:
        #    y.append(np.array([0,1,0]))  #OK, now make sure this is formateed properly!
            if openspread + endspread <0:
                y.append(np.array([0,1,0]))  #home team covered
            elif openspread + endspread >0:
                y.append(np.array([1,0,0]))  #road covered
            else: 
                y.append(np.array([0,0,1]))  #push!

    
    if moneyline:
        X = np.column_stack((X3,x5))
        #Spread is still a useful property for this type of bet. The spread implies the favorite! 
        for j in range(len(endspreadS)):  
            if endspreadS[j]<0:
                #means the home team had more points
                y.append(np.array([0,1]))
            else:
                y.append(np.array([1,0])) #alternatively, a road team victory. 
          

    #Now I iterated over all these, and hot encoded the labels of each to see whether or not the spread was covered
    #and by what team. 


        
    y = np.array(y)  #same explanation as above
                         
    #since everything got out of order I have to mash it together myself. 
    if normalize:
        
        scaler = MinMaxScaler()
        MinMaxScaler(copy=True, feature_range=(0, 1))

        scaler.fit(X)
        X = scaler.transform(X)
    X_train,X_test,y_train,y_test = cross_validation.train_test_split(X,y,test_size=0.27)
    #print((X[0]))
    #print(np.shape(X[0]))

    #now to construct a model 
    if sklearn: 
        model = MLPClassifier()
        model.shuffle = True
        model.batch_size = 25
    #model.n_layers_ = 1000000
    #model.n_outputs_= 1000000
    #These don't do anything, have to adjust the layers in some different way! Keras is useful for this.
        model.fit(X_train,y_train)
        print(model.score(X_test,y_test))
    if keras:
        print("keras NN goes here")
        model=Sequential()
        model.add(Dense(80,input_dim=np.shape(X)[1],activation='relu'))
        model.add(Dense(81,activation='relu'))
        model.add(Dense(31,activation='relu'))
        model.add(Dense(24,activation='relu'))
        if spread:
            model.add(Dense(3,activation='softmax'))
        if moneyline: 
            model.add(Dense(2,activation='softmax'))  #different outputs for the 2 problems!

        model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        model.fit(X_train,y_train,batch_size=40,epochs=20,validation_split=.2)
        scores = model.evaluate(X_test,y_test)
        print(scores[1])

    if tpot: 
        y2 = []
        for i in range(len(y_train)):
                if sum(y_train[i] == np.array([1, 0, 0])) == 3:
                    y2.append(0)
                else:
                    y2.append(1)
        
        from tpot import TPOTClassifier
        tpot = TPOTClassifier(generations = 5,population_size = 50,verbosity = 2, n_jobs = -1)
        tpot.fit(X_train,y2)
            #experimental. Trying to use genetic programming to identify optimal routine for game classification. 
    
        model = tpot #the returned model 
    return model,scaler