Ejemplo n.º 1
0
def neural_network_hyper(features_encoded, target_encoded):
    """ Hyperparameter tuning of Neural network
    :param: Features data
    :param: Target data
    :return: Neural network model with best parameters 
    """
    global hyperparameter
    if not hyperparameter:
        estimator = KerasClassifier(build_fn=create_model,
                                    n_hidden=5,
                                    size_nodo=12,
                                    ativ="relu",
                                    opt="adam",
                                    dropout=0.1,
                                    epochs=500,
                                    batch_size=10000,
                                    validation_split=0.1,
                                    verbose=0)
        return estimator
    grid_param = {
        'n_hidden': [2, 5],
        'size_nodo': [50, 200],
        'ativ': ['relu', 'softmax'],
        'opt': ['adam'],
        'dropout': [0.1],
        'epochs': [50],
        'batch_size': [20000]
    }
    model = KerasClassifier(build_fn=create_model,
                            verbose=1,
                            validation_split=0.1)
    ann_hyper_parameters = grid_search(features_encoded, target_encoded, 2,
                                       grid_param, model)
    print('\n\n\nBest Neural Network Hyper-parameters using GridSearch:\n',
          ann_hyper_parameters)

    estimator = KerasClassifier(
        build_fn=create_model,
        n_hidden=ann_hyper_parameters['n_hidden'],
        size_nodo=ann_hyper_parameters['size_nodo'],
        ativ=ann_hyper_parameters['ativ'],
        opt=ann_hyper_parameters['opt'],
        dropout=ann_hyper_parameters['dropout'],
        epochs=1000,  #ann_hyper_parameters['epochs'],
        batch_size=ann_hyper_parameters['batch_size'],
        validation_split=0.1,
        verbose=1)
    return estimator
Ejemplo n.º 2
0
def Keras_Classifier(n_splits, save_model_address, model_types, train_images,
                     train_labels, test_images, test_labels, image_size1,
                     image_size2, image_size3, label_types, epochs, times, L1,
                     L2, F1, F2, F3):
    print("begin of keras_classifier: ")
    model = KerasClassifier(build_fn=create_model,
                            model_types=model_types,
                            image_size1=image_size1,
                            image_size2=image_size2,
                            label_types=label_types,
                            image_size3=image_size3,
                            times=times,
                            L1=L1,
                            L2=L2,
                            F1=F1,
                            F2=F2,
                            F3=F3,
                            epochs=epochs,
                            batch_size=2,
                            verbose=1)
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=5000)
    #scores = cross_val_score(model1, train_images, train_labels, cv=kfold)
    print("before of cross_val_predict:")
    y_pre = cross_val_predict(model, train_images, train_labels, cv=kfold)
    print(y_pre)
    y_scores = y_pre
    print("train_labels", train_labels)
    print("y_scores", y_scores)
    fpr, tpr, thresholds = roc_curve(train_labels, y_scores)
    plt.plot(fpr, tpr)
    plt.savefig("ROC.png")
    plt.show()
    def gridSearch(inputs_train, output_train):
        model = KerasClassifier(build_fn=create_model, verbose=10)

        # defining grid search parameters
        param_grid = {
            'optimizer': ['RMSprop'],
            'batch_size': [10],
            'epochs': [100],
            #                  'learn_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
            #                  'momentum': [0.0, 0.2, 0.4, 0.6, 0.8, 0.9],
            'init_mode': ['lecun_uniform'],
            'activation': ['softmax'],
            'weight_constraint': [1],
            'dropout_rate': [0.0, 0.5, 0.9],
            'neurons': [10, 30]
        }
        grid = GridSearchCV(estimator=model,
                            param_grid=param_grid,
                            cv=3,
                            verbose=10)
        grid_result = grid.fit(inputs_train, output_train)

        # summarize results
        print("Best: %f using %s" %
              (grid_result.best_score_, grid_result.best_params_))

        return grid.best_params_, grid.best_score_
Ejemplo n.º 4
0
def grid_search(layers_list, epochs_list, X_train, Y_train, indim=300):
    tup_layers = tuple([tuple(l) for l in layers_list])
    tup_epochs = tuple(epochs_list)

    model = KerasClassifier(build_fn=create_model,
                            verbose=0)  #use our create_model

    # define the grid search parameters
    batch_size = [1]  #starting with just a few choices
    epochs = tup_epochs
    lyrs = tup_layers

    #use this to override our defaults. keys must match create_model args
    param_grid = dict(batch_size=batch_size,
                      epochs=epochs,
                      input_dim=[indim],
                      lyrs=lyrs)

    # buld the search grid
    grid = GridSearchCV(
        estimator=model,  #we created model above
        param_grid=param_grid,
        cv=3,  #use 3 folds for cross-validation
        verbose=2)  # include n_jobs=-1 if you are using CPU

    grid_result = grid.fit(np.array(X_train), np.array(Y_train))

    # summarize results
    print("Best: %f using %s" %
          (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
def train_gender(X, Y, X_test, train=True, epoch=10, batch_size=1024):
    # 类别转换为0和1
    encoder = LabelEncoder()
    encoder.fit(Y)
    Y_encoded = encoder.transform(Y)
    if train:
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        model = create_gender_model()
        model.fit(X,
                  Y_encoded,
                  validation_split=0.1,
                  batch_size=batch_size,
                  epochs=epoch)

        X_test = scaler.transform(X_test)
        y_pre = model.predict(X_test)
        threshold = 0.5
        y_pred_gender = np.where(y_pre > threshold, 1, 0)
        return y_pred_gender
    else:
        estimators = []
        estimators.append(('standardize', StandardScaler()))
        estimators.append(('mlp',
                           KerasClassifier(build_fn=create_gender_model,
                                           epochs=epoch,
                                           batch_size=batch_size,
                                           verbose=0)))
        pipeline = Pipeline(estimators)
        kfold = StratifiedKFold(n_splits=5, shuffle=True)
        results = cross_val_score(pipeline, X, Y_encoded, cv=kfold)
        print("Baseline: %.2f%% (%.2f%%)" %
              (results.mean() * 100, results.std() * 100))
Ejemplo n.º 6
0
    def fit(self, X, y, **kwargs):
        """
        Fit the workflow by building the word corpus, and fitting the keras model.
    
        Parameters
        ----------
        X : array-like, iterable
            Collection of str or an iterable which yields str
        y : array-like, shape (n_samples,)
            Class targets.
        **kwargs : 
            parameters passed to inner keras model
    
        Returns
        -------
        self : object
            Returns an instance of self.
        """

        x = self.text2seq.fit_transform(X)
        y_enc = to_categorical(y, self.num_labels)

        self.model_ = KerasClassifier(build_fn=self.factory.create_model,
                                      dictionary_size=self.text2seq.dictionary_size_,
                                      num_labels=self.num_labels)

        self.model_.fit(x, y_enc, **kwargs)

        return self
Ejemplo n.º 7
0
def main():
    args = parse_option()
    print(args)


    x_train, y_train = mnist_reader.load_mnist('data/fashion', kind='train')
    x_test, y_test = mnist_reader.load_mnist('data/fashion', kind='t10k')
    class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
    x_train=x_train.reshape(x_train.shape[0], 28, 28, 1)
    x_test=x_test.reshape(x_test.shape[0], 28, 28 ,1)
    x_train = x_train / 255.0
    x_test = x_test / 255.0
    y_train=keras.utils.to_categorical(y_train)
    y_test=keras.utils.to_categorical(y_test)
    num_classes = 10


    print("Grid search for batch_size,batch norm and learning rate")
    model = KerasClassifier(build_fn=build_model,,epochs=40,verbose=1)
    batch_size = [32,64,128]
    lr = [0.01,0.001]
    use_bn = [True,False]
    param_grid = dict(batch_size=batch_size, lr=lr,use_bn=use_bn)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
    grid_result = grid.fit(x_train, y_train)
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
Ejemplo n.º 8
0
    def test_LSTM_compilation_and_fit_predict_without_execution_error(self):
        # given
        x_train = np.array(
            ['this is really awesome !', 'this is really crap !!'])
        y_train = np.array([1, 0])

        ids_x_train = np.empty([2, 5])
        for i in range(0, len(x_train)):
            ids = [
                dummy_hash_function(token) for token in x_train[i].split(' ')
            ]
            ids_x_train[i, :] = ids
        num_labels = 2
        y_enc = to_categorical(y_train, num_labels)
        dictionary_size = np.int(np.max(ids_x_train) + 1)

        # when
        lstm_factory = LSTMFactory()
        clf_keras = KerasClassifier(build_fn=lstm_factory.create_model,
                                    dictionary_size=dictionary_size,
                                    num_labels=num_labels)
        clf_keras.fit(ids_x_train, y_enc, epochs=1, verbose=False)

        x_test = np.array(['it is really awesome !'])
        ids_x_test = np.empty([1, 5])
        ids_x_test[0, :] = [
            dummy_hash_function(token) for token in x_test[0].split(' ')
        ]

        y_pred = clf_keras.predict(ids_x_test)

        # then
        self.assertIsNotNone(y_pred)
Ejemplo n.º 9
0
def cross_val_nn():
    print('Cross validation initializing')
    X, y = data_time()
    mod = KerasClassifier(build_fn=phish_nn,
                          epochs=15,
                          batch_size=1500,
                          verbose=0)
    num_folds = 5
    kfold = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=808)
    print('Starting 5-fold cross validation of model')
    cv_results = cross_val_score(mod, X, y, cv=kfold)
    print('Starting 5-fold cross-validation predictions')
    cv_preds = cross_val_predict(mod,
                                 X,
                                 y,
                                 cv=kfold,
                                 verbose=0,
                                 method='predict')
    print('The average cross-validation accuracy is: ',
          round(cv_results.mean(), 4) * 100, '%')
    print('The 5-fold cross validation accuracy results are: \n', cv_results)
    acc = accuracy_score(y, cv_preds)
    cm = confusion_matrix(y, cv_preds)
    print('Confusion Matrix \n', cm)
    print('Accuracy Score: \n', acc)
    f1s = f1_score(y, cv_preds)
    print('The F1 score for the cross validated model is: \n', f1s)
    precis = precision_score(y, cv_preds, average='binary')
    rec = recall_score(y, cv_preds, average='binary')
    print('The precision-recall score is: \n', precis)
    print('The recall score is: \n', rec)
    return cm
Ejemplo n.º 10
0
def neural_network_classifier(X_train, X_test, t_train, t_test):
    # Function to create model, required for KerasClassifier
    def create_model():
        # create model
        model = keras.Sequential()
        model.add(layers.Dense(16, input_shape=(4,), activation='tanh'))
        model.add(layers.Dense(8, activation='tanh'))
        model.add(layers.Dense(4, activation='tanh'))
        model.add(layers.Dense(3, activation='softmax'))
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam', metrics=['accuracy'])
        return model

    nnclassifier = KerasClassifier(build_fn=create_model, verbose=0)

    k_fold = StratifiedKFold(
        n_splits=folds, random_state=random_seed, shuffle=True)

    param_grid = {
        "batch_size": [10, 20, 40, 60, 80, 100],
        "epochs": [10, 50, 100]
    }

    gs = GridSearchCV(nnclassifier, param_grid,
                      scoring=scoring, cv=k_fold, n_jobs=-1)
    gs.fit(X_train, t_train)

    print(gs.best_params_)
    use_model(gs, gs.best_score_, X_train, X_test,
              t_test, 'Neural Net Classification')
def train_age(X, Y, X_test, train=True, epoch=10, batch_size=1024):
    # 类别转换为0和1
    encoder = LabelEncoder()
    encoder.fit(Y)
    Y = encoder.transform(Y)
    if train:
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        Y = to_categorical(Y)
        model = create_age_model()
        model.fit(X, Y, batch_size=batch_size, epochs=epoch)

        X_test = scaler.transform(X_test)
        y_pre = model.predict(X_test)
        y_pred_age = np.argmax(y_pre, axis=1)

        return y_pred_age
    else:
        # estimator = KerasClassifier(
        #     build_fn=create_gender_model, epochs=epoch, batch_size=batch_size, verbose=0)
        estimators = []
        estimators.append(('standardize', StandardScaler()))
        estimators.append(('mlp',
                           KerasClassifier(build_fn=create_age_model,
                                           epochs=epoch,
                                           batch_size=batch_size,
                                           verbose=0)))
        pipeline = Pipeline(estimators)
        kfold = StratifiedKFold(n_splits=10, shuffle=True)
        results = cross_val_score(pipeline, X, Y, cv=kfold)
        print("Baseline: %.2f%% (%.2f%%)" %
              (results.mean() * 100, results.std() * 100))
Ejemplo n.º 12
0
    def fit(self, X, y):

        # check is tensorflow is available
        _check_tensorflow()

        sequence_length = X.shape[1]
        if self._model is None or sequence_length != self.sequence_length:
            self.sequence_length = sequence_length
            keras_model = _create_lstm_pool_model(
                embedding_matrix=self.embedding_matrix,
                backwards=self.backwards,
                dropout=self.dropout,
                optimizer=self.optimizer,
                max_sequence_length=sequence_length,
                lstm_out_width=self.lstm_out_width,
                learn_rate=self.learn_rate,
                verbose=self.verbose)
            self._model = KerasClassifier(keras_model, verbose=self.verbose)
        self._model.fit(X,
                        y,
                        batch_size=self.batch_size,
                        epochs=self.epochs,
                        shuffle=self.shuffle,
                        class_weight=self.class_weight,
                        verbose=self.verbose)
Ejemplo n.º 13
0
def run_gridSearch(X_train, y_train):
    # Tuning the model: Grid Search method
    # Method to tune and test different combinations of parameters/hyperparameters

    # Wrap the whole thing
    wrappedClassifier = KerasClassifier(
        build_fn=build_model(dropout_rate=0.2, optimizer='adam'))

    # Create parameters dictionary
    params = {
        'batch_size': [10, 20],
        'epochs': [15, 20],
        'Optimizer': ['adam', 'nadam']
    }

    gridSearch = GridSearchCV(estimator=wrappedClassifier,
                              param_grid=params,
                              scoring='accuracy',
                              cv=7)

    gridSearch = gridSearch.fit(X_train, y_train)

    best_param = gridSearch.best_params_
    best_accuracy = gridSearch.best_score_

    return best_param, best_accuracy
Ejemplo n.º 14
0
 def __init__(self, vocab_size, create_model_callback, title, layers,
              isFinal, existentModel):
     print("""\n\nRNN PARAMETERS
 _________________________________
 vocab_size:    {}
 title:         {}
 layers:        {}
 isFinal:       {} 
 existentModel: {} 
 \n\n""".format(vocab_size, title, layers, isFinal, existentModel))
     metric_monitor = "val_recall"  #"val_loss"
     self.callbacks = [
         EarlyStopping(monitor=metric_monitor, mode='min', verbose=1),
         ModelCheckpoint("{}_checkpoint_model.h5".format(title),
                         monitor=metric_monitor,
                         mode='max',
                         save_best_only=True,
                         verbose=1),
         CSVLogger('{}_train_callback_log.txt'.format(title))
     ]
     MLModel.__init__(self,
                      model=KerasClassifier(
                          build_fn=create_model_callback,
                          epochs=50,
                          batch_size=10,
                          verbose=2,
                          vocab_size=vocab_size,
                          hidden_dims=layers,
                      ),
                      param_grid=getRNNGrid(),
                      title=title,
                      isFinal=isFinal,
                      existentModel=existentModel)
Ejemplo n.º 15
0
 def build_nn_classifier(self, X, y, params):
     return KerasClassifier(self.build_classifier,
                            input_dim=X.shape[1],
                            epochs=200,
                            batch_size=8,
                            verbose=1 if self.verbose else 0,
                            **params)
Ejemplo n.º 16
0
def get_keras_classifier_pipeline(data):
    """
    Keras Classifier: https://www.tensorflow.org/api_docs/python/tf/keras/wrappers/scikit_learn/KerasClassifier
    """

    # Infer the shape of the feature vector size after passing in some testing through the pipeline ot transform it
    if config.INFER_KERAS_INPUT_SHAPE:
        spy = utils.Pipeline_Spy()
        pipeline = create_classifier_pipeline(spy, data)

        # Need to represent the single data sample as a 1 by num_features array, not a 1-dimensional vector num_features long
        data_sample = np.array(data.iloc[1, :])[np.newaxis, ...]
        print("Original data shape: {}".format(data_sample.shape))

        feature_vector_transformed = pipeline.fit_transform(data_sample)[0]
        print("Transformed data shape: {}".format(
            feature_vector_transformed.shape))

        feature_vector_input_length = len(feature_vector_transformed)

        print("Inferred feature vector length for Keras model: {}".format(
            feature_vector_input_length))
    else:
        feature_vector_input_length = config.KERAS_INPUT_SHAPE

    clf = KerasClassifier(build_fn=create_keras_model,
                          input_dim=feature_vector_input_length,
                          epochs=150,
                          batch_size=32)

    return create_classifier_pipeline(clf, data)
Ejemplo n.º 17
0
def main():
    ''' Main function '''
    # Load data
    x_train, y_train, x_test, y_test, test_img_idx = prepare_data()

    MCBN_model = KerasClassifier(build_fn=create_model,
                                 epochs=15,
                                 batch_size=32,
                                 verbose=0)

    print("Start fitting monte carlo batch_normalization model")

    X = x_train[0:int(TRAIN_VAL_SPLIT * len(x_train))]
    X = X.astype('float32')
    Y = y_train[0:int(TRAIN_VAL_SPLIT * len(x_train))]
    X /= 255

    # define the grid search parameters
    optimizer = [
        'SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam'
    ]
    param_grid = dict(optimizer=optimizer)
    grid = GridSearchCV(estimator=MCBN_model,
                        param_grid=param_grid,
                        n_jobs=-1,
                        cv=3)
    grid_result = grid.fit(X, Y)
    # summarize results
    print("Best: %f using %s" %
          (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
Ejemplo n.º 18
0
def gridSearch(inputs_train, output_train):
    model = KerasClassifier(build_fn=create_model, verbose=0)

    # defining grid search parameters
    param_grid = {
        'optimizer':
        ['SGD', 'RMSprop',
         'Adam'],  #best:SGD , 'Adagrad',, 'Adadelta', 'Adamax', 'Nadam'
        'batch_size': [10, 100, 500],  #best:10
        'epochs': [100, 1000],  #best:100
        #                  'learn_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
        #                  'momentum': [0.0, 0.2, 0.4, 0.6, 0.8, 0.9],
        #                  'init_mode': ['uniform','normal'], #, 'zero', 'lecun_uniform',, 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'
        #                  'activation': ['softmax','relu','sigmoid'], #, 'softplus', 'softsign', , 'tanh', , 'hard_sigmoid', 'linear'
        #                 # 'weight_constraint': [1, 3, 5],
        #                  'dropout_rate': [0.0, 0.9], #, 0.5
        #                  'neurons': [25, 50] #10,
    }
    grid = GridSearchCV(estimator=model,
                        param_grid=param_grid,
                        cv=3,
                        verbose=10)
    grid_result = grid.fit(inputs_train, output_train)

    # summarize results
    print("Best: %f using %s" %
          (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    return grid.best_params_, grid.best_score_
Ejemplo n.º 19
0
 def build_nn_classifier(self, X, y, params):
     return KerasClassifier(build_fn=self.nn_classifier_model,
                            input_dim=X.shape[1],
                            num_classes=len(np.unique(y)),
                            epochs=200,
                            batch_size=8,
                            verbose=1 if self.verbose else 0,
                            **params)
Ejemplo n.º 20
0
def run(dataName, foldId, imputationMethod, proposedMethod):
    
    EPOCHS = 2000
    NUMBER_OF_CV_FOLDS = 10
    ALL_WEIGHT_REG_CANDIDATES = [1.0, 0.1, 0.01, 0.001, 0.0001]
    ALL_TRANSFORM_REG_CANDIDATES = [1.0, 0.1, 0.01, 0.001, 0.0]
    
    # ALL_WEIGHT_REG_CANDIDATES = [1.0]
    # ALL_TRANSFORM_REG_CANDIDATES = [1.0]
    
    NR_JOBS = 1


    if proposedMethod:
        createModel = createModelProposed
    else:
        createModel = createModelLogReg
    
    modelForCV = KerasClassifier(build_fn=createModel, epochs=EPOCHS, verbose=True)
    
    trainData, trainLabels, unlabeledData, testData, testLabels = realdata.loadSubset(dataName, None, foldId, imputationMethod)

    parameters = {"classWeightRegularizer" : ALL_WEIGHT_REG_CANDIDATES, "transformWeightRegularizer" : ALL_TRANSFORM_REG_CANDIDATES}
    gridSearchObj = sklearn.model_selection.GridSearchCV(modelForCV, parameters, scoring = myScorer, cv = NUMBER_OF_CV_FOLDS, n_jobs = NR_JOBS)
    gridSearchObj.fit(trainData, trainLabels)
    
    cvResult = pandas.DataFrame.from_dict(gridSearchObj.cv_results_)
    meanScoresEval = (cvResult["mean_test_score"]).as_matrix()
    bestId = numpy.argmax(meanScoresEval)
    
    bestWeightParam = cvResult.loc[bestId, "param_classWeightRegularizer"]
    bestTransformParam = cvResult.loc[bestId, "param_transformWeightRegularizer"]
    meanScoresTrain = (cvResult["mean_train_score"]).as_matrix()
    
    
    finalModel = createModel(transformWeightRegularizer=bestTransformParam, classWeightRegularizer=bestWeightParam)
    finalModel.fit(trainData, trainLabels, epochs=EPOCHS, verbose=True)
    aucTest, logLikelihood = evaluation.eval_NN(finalModel, testData, testLabels)
    
#     print("bestWeightParam = ")
#     print(bestWeightParam)
#     print("meanScores = ")
#     print(meanScores)
#     print("TRAIN DATA:")
#     auc, logLikelihood = evaluation.eval_NN(finalModel, trainData, trainLabels)
#     print("auc = ", auc)
#     print("logLikelihood = ", logLikelihood)
         
        
    # print("TEST DATA:")
    # print("auc = ", aucTest)
    # print("logLikelihood = ", logLikelihood)
    
#     print("average training score = ", meanScoresTrain[bestId])
#     print("average eval score = ", meanScoresEval[bestId])
#     print("test score = ", logLikelihood)
    
    return logLikelihood, meanScoresEval[bestId], meanScoresTrain[bestId], aucTest, bestWeightParam, bestTransformParam
Ejemplo n.º 21
0
def make_nn_voting_classifier(build_func,
                              params_grid,
                              n_iter,
                              random_state,
                              X_train,
                              y_train,
                              X_val,
                              y_val,
                              early_stopping,
                              epochs=100,
                              shuffle=False,
                              verbose=1,
                              n_best_models=5,
                              voting='soft'):
    ''' make voting classifier with n best neural network models using Keras wrappers for the Scikit-Learn API'''

    models = []
    scoring_list = []

    # list of dicts random generated paramters
    params_list = list(ParameterSampler(params_grid, n_iter, random_state))

    for params in params_list:

        # set parameters for model
        model = KerasClassifier(build_fn=build_func,
                                validation_data=(X_val, y_val),
                                input_shape=X_train.shape[1:],
                                epochs=epochs,
                                shuffle=shuffle,
                                callbacks=[early_stopping],
                                verbose=0,
                                **params)

        model_name = f'{model.__class__.__name__}{params}'
        model.fit(X_train, y_train)

        score_train = accuracy_score(model.predict(X_train), y_train)
        score_valid = accuracy_score(model.predict(X_val), y_val)

        scoring_list.append(score_valid)
        models.append((model_name, model))

        if verbose == 1:
            print(model_name)
            print(
                f'Accuracy score on training set: {score_train.round(4)} | Accuracy score on validation set: {score_valid.round(4)}'
            )
            print('-' * 100)

    # select best models using scoring list
    best_models_index = np.argsort(scoring_list)[::-1][:n_best_models]
    best_models = np.array(models)[best_models_index]

    # make voting classifier using best models
    voting_clf = VotingClassifier(estimators=best_models, voting=voting)

    return voting_clf
    def fit(self, X_raw, y_raw):
        """Classifier training function.

        Here you will implement the training function for your classifier.

        Parameters
        ----------
        X_raw : numpy.ndarray
            A numpy array, this is the raw data as downloaded
        y_raw : numpy.ndarray (optional)
            A one dimensional numpy array, this is the binary target variable

        Returns
        -------
        ?
        """
        X_clean = self._preprocessor(X_raw)
        class_weights = class_weight.compute_class_weight(
            'balanced', np.unique(y_raw), y_raw)
        es = EarlyStopping(monitor='val_loss',
                           min_delta=0.0001,
                           patience=5,
                           mode='auto',
                           restore_best_weights=True)

        # ARCHITECTURE OF OUR MODEL

        def make_nn(hidden_layers=[7, 7], lrate=0.001):
            sgd = optimizers.SGD(lr=lrate)
            adam = optimizers.Adam(lr=lrate)
            he_init = he_normal()
            model = Sequential()
            for k in hidden_layers:
                model.add(
                    Dense(k,
                          activation='relu',
                          kernel_initializer=he_init,
                          bias_initializer='zeros'))
            model.add(
                Dense(1,
                      activation='sigmoid',
                      kernel_initializer=he_init,
                      bias_initializer='zeros'))
            model.compile(loss='binary_crossentropy', optimizer=adam)
            return model

        self.base_classifier = KerasClassifier(make_nn,
                                               class_weight=class_weights,
                                               epochs=350,
                                               validation_split=0.1,
                                               batch_size=32,
                                               callbacks=[es])

        # THE FOLLOWING GETS CALLED IF YOU WISH TO CALIBRATE YOUR PROBABILITES

        self.base_classifier.fit(X_clean, y_raw)
        return self.base_classifier
Ejemplo n.º 23
0
def classifier(X_train, y_train, X_test, X_params_select, y_params_select,
               tune_size, config_list, opt_modulo_params):
    ###
    # Hyperparameterarrays
    ###

    learn_rate = [0.1]
    momentum = [float(x) for x in np.linspace(start=0, stop=0.9, num=4)]
    hidden_layer_n = [int(x) for x in np.linspace(start=10, stop=100, num=4)]

    param_grid = {
        'learn_rate': learn_rate,
        'momentum': momentum,
        'hidden_layer_n': hidden_layer_n,
    }

    ###
    # Training and Fitting
    ###
    early_stopping = EarlyStopping(monitor='accuracy', patience=500)
    model = KerasClassifier(build_fn=create_model, epochs=10000)
    if config_list['optimize_method'] == 1:
        if config_list['randomized_search'] == 1:
            grid = RandomizedSearchCV(estimator=model,
                                      param_distributions=param_grid,
                                      cv=2,
                                      n_iter=25)
        else:
            grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=2)

        model = grid.fit(X_params_select,
                         y_params_select,
                         epochs=10000,
                         callbacks=[early_stopping],
                         verbose=0)
        opt_modulo_params = grid.best_params_
        #model = create_model(**opt_modulo_params)
        print(opt_modulo_params)

    else:
        model = create_model()
        model.fit(X_train,
                  y_train,
                  epochs=10000,
                  callbacks=[early_stopping],
                  verbose=0)

    predictions_prob = model.predict(
        X_test
    )  #bugfix: https://datascience.stackexchange.com/questions/13461/how-can-i-get-prediction-for-only-one-instance-in-keras
    print(predictions_prob)
    y_pred = np.where(predictions_prob >= 0.5, 1, -1)
    y_pred = list(itertools.chain(*y_pred))
    y_pred = np.array(y_pred)
    print(y_pred)
    print(type(y_pred))
    return y_pred, opt_modulo_params
Ejemplo n.º 24
0
def cross_validation(model_fn, features, target):
    estimator = KerasClassifier(build_fn=model_fn,
                                epochs=10,
                                batch_size=5,
                                verbose=1)
    kfold = KFold(n_splits=10, shuffle=True)
    results = cross_val_score(estimator, features, target, cv=kfold)
    print("Baseline: %.2f%% (%.2f%%)" %
          (results.mean() * 100, results.std() * 100))
Ejemplo n.º 25
0
def create_scikit_keras_classifier(X, y):
    # create simple (dummy) Keras DNN model for classification
    batch_size = 500
    epochs = 10
    model_func = create_scikit_keras_model_func(X.shape[1])
    model = KerasClassifier(build_fn=model_func,
                            nb_epoch=epochs,
                            batch_size=batch_size,
                            verbose=1)
    model.fit(X, y)
    return model
Ejemplo n.º 26
0
def ede_dnn(dnn_model,
            Xtrain,
            ytrain,
            Xtest,
            ytest,
            batch_size,
            epochs,
            model_dir,
            patience=3,
            factor=0.2,
            export='DNN_y2',
            verbose=0):
    """
    Used to generate DNN model instance and training.

    :param dnn_model: Model to be generated
    :param Xtrain: Training input data
    :param ytrain: Training ground truth
    :param Xtest: Testing input data
    :param ytest: Testing ground truth
    :param batch_size: DNN Batch size
    :param epochs: Training Epochs
    :param model_dir: Model directory location
    :param patience: Patiance for early stopping callback
    :param factor: Factor for reduce learning rate
    :param export: name used for exporting
    :return: tf.history
    """
    # One hot encoding of groundtruth both testing and training
    y_oh_train = pd.get_dummies(ytrain, prefix='target')
    y_oh_test = pd.get_dummies(ytest, prefix='target')

    early_stopping = EarlyStopping(monitor="loss",
                                   patience=patience)  # early stop patience
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=factor,
                                  patience=5,
                                  min_lr=0.00001)
    model = KerasClassifier(build_fn=dnn_model,
                            verbose=verbose,
                            callbacks=[early_stopping, reduce_lr])
    history = model.fit(np.asarray(Xtrain),
                        np.asarray(y_oh_train),
                        batch_size=batch_size,
                        epochs=epochs,
                        callbacks=[early_stopping, reduce_lr],
                        verbose=0,
                        validation_data=(np.asarray(Xtest),
                                         np.asarray(y_oh_test)))
    # Saving History
    df_history = pd.DataFrame(history.history)
    history_name = "DNN_history_{}.csv".format(export)
    df_history.to_csv(os.path.join(model_dir, history_name), index=False)
    return history
Ejemplo n.º 27
0
def fit_network(build_func,
                params,
                X_train,
                y_train,
                X_val,
                y_val,
                early_stopping,
                kind='ann',
                epochs=50,
                shuffle=True,
                verbose=1):
    ''' make pipeline with feature selecion, fit it on train set and evaluate on validation set'''

    # shuffle training data in each iteration
    idx = np.arange(len(y_train))
    np.random.shuffle(idx)

    # set parameters for model
    model = KerasClassifier(
        build_fn=build_func,
        # validation_split=0.1,
        validation_split=0.03,
        input_shape=X_train.shape[1:],
        epochs=epochs,
        shuffle=shuffle,
        callbacks=[early_stopping],
        verbose=0,
        **params)
    # give model name
    model_name = f'{model.__class__.__name__}{params}'

    if kind == 'ann':
        Builder = AnnBuilder
    else:
        Builder = RnnBuilder

    # create pipeline
    pipe = Pipeline([('feature_seletion',
                      ImportantFeaturesSelector(model, kind)),
                     ('classifier', Builder(model))])

    pipe.fit(X_train[idx], y_train[idx])

    score_train = accuracy_score(pipe.predict(X_train[idx]), y_train[idx])
    score_valid = accuracy_score(pipe.predict(X_val), y_val)

    if verbose == 1:
        print(model_name)
        print(
            f'Accuracy score on training set: {score_train.round(4)} | Accuracy score on validation set: {score_valid.round(4)}'
        )
        print('-' * 127)

    return ((model_name, pipe), score_valid)
Ejemplo n.º 28
0
def sklearn_pipeline():
    # wrap the model using the function you created
    clf = KerasClassifier(build_fn=create_model,
                          verbose=0,
                          epochs=100,
                          validation_split=0.2)

    # just create the pipeline
    pipeline = Pipeline([('scaler', StandardScaler()), ('estimator', clf)])

    return pipeline
Ejemplo n.º 29
0
def _neural_1estimator(file_path):

    df = pd.read_csv(file_path)
    hyper_params = {}
    _update_estimator_params(df.columns, hyper_params, df.iloc[0])

    early_stop = EarlyStopping(monitor='loss', patience=3, min_delta=0.001)

    return KerasClassifier(build_nn,
                           epochs=1000,
                           callbacks=[early_stop],
                           **hyper_params), hyper_params
Ejemplo n.º 30
0
 def fit(self, X, y):
     weights = np.mean(np.sum(y, axis=0)) / np.sum(y, axis=0)
     self.dict_weights = dict(enumerate(weights))
     self.classifier = KerasClassifier(
         build_fn=self.neuralNet,
         epochs=self.epochs,
         batch_size=self.batch_size,
         verbose=self.verbose,
         class_weight=self.dict_weights,
     )
     self.classifier.fit(X, y)
     return self