def create_model(self, input_shape_length, layer1, layer2, activation, dropout): self.keras_clf = KerasClassifier(self.model_builder, input_shape_length=input_shape_length, layer1=layer1, layer2=layer2, activation=activation, dropout=dropout)
def get_model(self, optimizer_name): if optimizer_name == 'adamax': return KerasClassifier(build_fn=model_adamax, epochs=50, verbose=False) elif optimizer_name == 'rmsprop': return KerasClassifier(build_fn=model_rmsprop, epochs=50, verbose=False) elif optimizer_name == 'sgdm': return KerasClassifier(build_fn=model_sgdm, epochs=50, verbose=False)
def k_fold_cross_val_score_f1(x, y, get_model, epochs): f1_estimator = KerasClassifier(build_fn=get_model, epochs=epochs) f1_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=5) f1_scores = cross_val_score(f1_estimator, x, y, cv=f1_kfold, scoring='f1') print("Average F1-score = %.4f (%.4f)" % (f1_scores.mean(), f1_scores.std())) return f1_scores
def k_fold_cross_val_score_accuracy(x, y, get_model, epochs): accuracy_estimator = KerasClassifier(build_fn=get_model, epochs=epochs) accuracy_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=5) accuracies = cross_val_score(accuracy_estimator, x, y, cv=accuracy_kfold) print("Average accuracy = %.2f%% (%.2f%%)" % (accuracies.mean() * 100, accuracies.std() * 100)) return accuracies
def grid_search(TIME_PERIODS, input_shape, num_sensors, num_classes, x_train, y_train_hot): # TODO Define the model model = KerasClassifier( build_fn=models.cnn2, time_periods=TIME_PERIODS, input_shape=input_shape, number_of_sensors=num_sensors, number_of_classes=num_classes, verbose=0, # epochs=50, # batch_size=80 ) # TODO HERE CHANGE HYPER PARAMETERS: param_grid = dict(epochs=EPOCHS, batch_size=BATCH_SIZE) grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=10) grid_result = grid.fit(x_train, y_train_hot) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) means = grid_result.cv_results_['mean_test_score'] stds = grid_result.cv_results_['std_test_score'] params = grid_result.cv_results_['params'] for mean, stdev, param in zip(means, stds, params): print("%f (%f) with: %r" % (mean, stdev, param))
def check_lstm(lstm_model): # load data _, texts, y = asr.read_data(data_fp) # create features and labels X, word_index = asr.text_to_features(texts) # Load embedding layer. embedding = asr.load_embedding(embedding_fp, word_index=word_index) embedding_matrix = asr.sample_embedding(embedding, word_index) # create the model model = KerasClassifier( lstm_model(embedding_matrix=embedding_matrix), verbose=1, ) fit_kwargs = {"epochs": 2, "batch_size": 2, "class_weight": 20.0} # start the review process. reviewer = asr.ReviewSimulate( X, y=y, model=model, n_instances=1, n_queries=1, fit_kwargs=fit_kwargs, prior_included=[1, 3], # List of some included papers prior_excluded=[2, 4], # List of some excluded papers ) reviewer.review() check_log(reviewer._logger._log_dict)
def grid_search(layers_list, epochs_list, X_train, Y_train, indim=236): tup_layers = tuple([tuple(l) for l in layers_list]) tup_epochs = tuple(epochs_list) model = KerasClassifier(build_fn=create_model, verbose=0) #use our create_model # define the grid search parameters batch_size = [1] #starting with just a few choices epochs = tup_epochs lyrs = tup_layers #use this to override our defaults. keys must match create_model args param_grid = dict(batch_size=batch_size, epochs=epochs, input_dim=[indim], lyrs=lyrs) # buld the search grid grid = GridSearchCV( estimator=model, #we created model above param_grid=param_grid, cv=3, #use 3 folds for cross-validation verbose=2) # include n_jobs=-1 if you are using CPU grid_result = grid.fit(np.array(X_train), np.array(Y_train)) # summarize results print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) means = grid_result.cv_results_['mean_test_score'] stds = grid_result.cv_results_['std_test_score'] params = grid_result.cv_results_['params'] for mean, stdev, param in zip(means, stds, params): print("%f (%f) with: %r" % (mean, stdev, param))
def main(): # hyperparameters batch_size = 32 epochs = 10 # max number of training sets to train on. Used to check at which point more data doesn't result in a better model. # -1 for no cap. training_data_cap = -1 input_arr = np.load('../' + input_file) output_arr = np.load('../' + output_file) if training_data_cap != -1: input_arr = input_arr[:training_data_cap] output_arr = output_arr[:training_data_cap] # evaluate baseline model with standardized dataset estimators = [('standardize', StandardScaler()), ('mlp', KerasClassifier(build_fn=baseline_model, epochs=epochs, batch_size=batch_size, verbose=0))] pipeline = Pipeline(estimators) kfold = StratifiedKFold(n_splits=10, shuffle=True) results = cross_val_score(pipeline, input_arr, output_arr, cv=kfold) print("Standardized: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100))
def k_fold_cross_val_score_with_far_frr(x, y, get_model, epochs): estimator = KerasClassifier(build_fn=get_model, epochs=epochs) kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=5) results = cross_val_score(estimator, x, y, cv=kfold) print("Average score = %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100)) fars, frrs = [], [] for train, test in kfold.split(x, y): y_pred = get_model().predict(x[test]) m = tensorflow.keras.metrics.FalsePositives() m.update_state(y[test], y_pred.ravel()) far = m.result().numpy() / y[test].shape[0] print("FAR = ", far) fars.append(far) m.reset_states() m = tensorflow.keras.metrics.FalseNegatives() m.update_state(y[test], y_pred.ravel()) frr = m.result().numpy() / y[test].shape[0] print("FRR = ", frr) frrs.append(frr) fars = numpy.array(fars) frrs = numpy.array(frrs) print("Average FAR = %.4f (%.4f)" % (fars.mean(), fars.std())) print("Average FRR = %.4f (%.4f)" % (frrs.mean(), frrs.std())) return results, fars, frrs
def check_lstm(lstm_model, monkeypatch): # load data as_data = asr.ASReviewData.from_file(data_fp) _, texts, _ = as_data.get_data() # create features and labels X, word_index = asr.text_to_features(texts) # Load embedding layer. embedding = asr.load_embedding(embedding_fp, word_index=word_index) embedding_matrix = asr.sample_embedding(embedding, word_index) # create the model model = KerasClassifier( lstm_model(embedding_matrix=embedding_matrix), verbose=1, ) fit_kwargs = {"epochs": 2, "batch_size": 2, "class_weight": 20.0} monkeypatch.setattr('builtins.input', lambda _: "0") # start the review process. reviewer = asr.ReviewOracle( X, as_data=as_data, model=model, n_instances=1, n_queries=1, fit_kwargs=fit_kwargs, prior_included=[1, 3], # List of some included papers prior_excluded=[2, 4], # List of some excluded papers ) reviewer.review() check_log(reviewer._logger._log_dict)
def get_best_parameters(self, X_train, y_train): model = KerasClassifier(build_fn=self._create_model) param_grid = [{ 'init_mode': ['he_normal', 'he_uniform', 'glorot_normal'], 'activation_mode': ['elu', 'relu'], 'optimizer': ['adam', 'Nadam'] }] grid_search = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1) grid_search.fit(X_train, y_train) print('The best hyper parameters = ', grid_search.best_params_)
def adaboost_run(): train_path = base_path + '/Dataset/tianci/LCZ/splited/training%d.h5' % case test_paht = base_path + '/Dataset/tianci/LCZ/splited/test%d.h5' % case train_set = h5py.File(train_path) test_set = h5py.File(test_paht) out_set = h5py.File(out_path) train_sen1 = np.array(train_set['sen2']) train_label = np.array(train_set['label']) test_sen1 = np.array(test_set['sen2']) test_label = np.array(test_set['label']) out_sen = np.array(out_set['sen2']) train_sen1 = regtoOne(train_sen1) test_sen1 = regtoOne(test_sen1) ############################# train_sen1 = flatten2(train_sen1) test_sen1 = flatten2(test_sen1) out_sen = flatten2(out_sen) ############################### train_label = np.argmax(train_label, axis=1) test_label = np.argmax(test_label, axis=1) ann_model = KerasClassifier(build_fn=get_model2, epochs=10, batch_size=10) boosted_ann = AdaBoostClassifier(base_estimator=ann_model) boosted_ann.fit(train_sen1, train_label) # model.fit(train_sen1,train_label, # batch_size=batch_size, epochs=epoch, # validation_data = (test_sen1,test_label)); res = boosted_ann.predict(out_sen) print(res) py = np.argmax(res, axis=1) res_out = np.zeros((len(py), 17), np.int) res_out[np.arange(len(py)), py] = 1 print(res_out) np.savetxt(result_path, res_out, '%d', delimiter=',')
def build(self, **kwargs): """ Builds and returns estimator. Args: kwargs (key-value(int)): The user must specify ``input_dim`` and ``num_samples``. Returns: `sklearn pipeline` object: pipeline for transforming the features and training the estimator """ if 'input_dim' not in kwargs: raise ValueError('You need to specify input dimensions when building the model.') if 'num_samples' not in kwargs: raise ValueError('You need to specify num_samples when building the keras model.') if 'num_classes' not in kwargs: raise ValueError('You need to specify num_classes when building the keras model.') input_dim=kwargs['input_dim'] num_samples = kwargs['num_samples'] num_classes = kwargs['num_classes'] #the arguments of ``build_fn`` are not passed directly. Instead they should be passed as arguments to ``KerasClassifier``. estimator = KerasClassifier(build_fn=self._keras_model, num_classes=num_classes, input_dim=input_dim, batch_size=self._hyperparameters['batch_size'], epochs=self._hyperparameters['epochs']) # grid = GridSearchCV(estimator=estimator, # param_grid=self._hyperparameters, # cv=self._num_cv_folds, # refit=self._refit, # verbose=self._verbose) return self._create_pipeline(estimator=estimator)
labels = dataset[:, 4] print(features) print(labels) print(dataset.shape) encoder = LabelEncoder() encoder.fit(labels) encoded_Y = encoder.transform(labels) print(encoded_Y) dummy_y = np_utils.to_categorical(encoded_Y) print(type(dummy_y), dummy_y[:50]) def baseline_model(): model = Sequential() model.add(Dense(8, input_dim=4, activation='relu')) model.add(Dense(3, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) return model estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=10, verbose=0) fiveFold = KFold(n_splits=3, shuffle=True) results = cross_val_score(estimator, features, dummy_y, cv=fiveFold) print(f"accuracy:{results.mean()}, std:{results.std()}")
optimizer='adam', metrics=['accuracy']) return model # Smaller model def create_smaller(): model = Sequential() model.add( Dense(30, input_dim=60, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal', activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model # evaluate model with standardized dataset keras_baseline = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0) standardize = StandardScaler() pipeline = Pipeline([('scale', standardize), ('keras', keras_baseline)]) kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) results = cross_val_score(pipeline, X, encoded_y, cv=kfold) print("Baseline: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100))
model.add(Embedding(max_fatures, embed_dim, input_length=X.shape[1])) model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(3, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model # print(model.summary()) # Label encoding to transform category into integers labelencoder = LabelEncoder() integer_encoded = labelencoder.fit_transform(data['sentiment']) y = to_categorical(integer_encoded) X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42) # Test which batch size and epoch combination produce the best accuracy model = KerasClassifier(build_fn=createmodel, verbose=2) batch_size = [30, 60, 100] epochs = [2, 3, 5] param_grid = dict(batch_size=batch_size, epochs=epochs) grid = GridSearchCV(estimator=model, param_grid=param_grid) grid_result = grid.fit(X_train, Y_train) # summarize results print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
kernel_initializer=initializer, kernel_regularizer=regularizers.l1(0.01), activity_regularizer=regularizers.l2(0.01)), keras.layers.Dense(20, activation=tf.nn.softmax) ]) # Model compiling with adam optimizer (which gives an adaptive learning rate) opt = optimizers.Adam(lr=0.001) model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model neural_network = KerasClassifier(build_fn=create_network, epochs=25) # pred_labels = cross_val_predict(neural_network, train_data, train_labels, cv=3) # conf_matrix = confusion_matrix(train_labels, pred_labels) # # plot_cells_matrix = [] # for i in range(conf_matrix.shape[0]): # line = [] # for j in range(conf_matrix.shape[1]): # line.append(str(conf_matrix[i][j])) # plot_cells_matrix.append(line) # # plt.figure() # tb = plt.table(cellText=plot_cells_matrix, loc=(0, 0), cellLoc='center') # ax = plt.gca() # ax.set_xticks([]) # ax.set_yticks([])
data = load_watch() X = data['X'] y = data['y'] # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) # create a segment learning pipeline pipe = Pype([('seg', SegmentX(width=100, step=100, order='C')), ('crnn', KerasClassifier(build_fn=crnn_model, epochs=4, batch_size=256, verbose=0, validation_split=0.2))]) ############################################## # Accessing training history ############################################## # this is a bit of a hack, because history object is returned by the # keras wrapper when fit is called # this approach won't work with a more complex estimator pipeline, in which case # a callable class with the desired properties should be made passed to build_fn pipe.fit(X_train, y_train) print(DataFrame(pipe.history.history)) ac_train = pipe.history.history['accuracy']
le = preprocessing.LabelEncoder() le.fit(y_cat) y = encode(le, y_cat) def base_line_model(): model = Sequential() model.add(Dense(20, input_dim=emsize, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add(Dense(20, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.2)) model.add(Dense(cat_number, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model estimator = KerasClassifier(build_fn=base_line_model, epochs=1000, verbose=1) kfold = KFold(n_splits=3, shuffle=True, random_state=seed) results = cross_val_score(estimator, x, y, cv=kfold) print("Baseline: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100))
input_dim=13)) classifier.add(Dropout(0.0)) classifier.add( Dense(40, activation='sigmoid', kernel_initializer='random_normal')) classifier.add(Dropout(0.0)) classifier.add( Dense(4, activation='sigmoid', kernel_initializer='random_normal')) optimizer = optimizers.SGD(lr=0.7, momentum=0.9) classifier.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) return classifier # create Model model = KerasClassifier(build_fn=createModel, batch_size=1, epochs=1600, verbose=0) # define the grid search parameters losses = ['binary_crossentropy', 'mean_squared_error', 'mean_absolute_error'] # neurons = [30, 40, 45, 50, 55, 60] # dropout_rates = [0.0, 0.2, 0.4, 0.6, 0.9] # activations = ['softmax', 'relu', 'tanh', 'sigmoid', 'linear'] # init_mode = ['random_normal', 'zeros', 'ones'] # batch_size = [1, 8, 16, 32] # epochs = [1000, 1200, 1400, 1600] # learning_rate = [0.1, 0.3, 0.5, 0.7, 0.9] # momentums = [0.0, 0.2, 0.4, 0.6, 0.9] param_grid = dict(loss=losses) # batch_size=batch_size, epochs=epochs # learn_rate=learning_rate, momentum=momentums
model.fit(X_train, y_train, epochs=100, batch_size=10) scores = model.evaluate(X_test, y_test) print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100)) # calcolo della predizione predictions = model.predict(X_test) rounded = [round(x[0]) for x in predictions] accuracy = accuracy_score(y_test, rounded) print('\nClasification report:\n', classification_report(y_test, rounded)) print('\nConfussion matrix:\n', confusion_matrix(y_test, rounded)) model1 = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0) print(model1) model1 = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0) probs = model.predict_proba(X_test) # keep probabilities for the positive outcome only probs = probs[:, 0] auc = roc_auc_score(y_test, probs) print('AUC: %.3f' % auc) # calculate roc curve
random_state=SEED, stratify=le_labels) X_train, X_test = X_train / 255.0, X_test / 255.0 train = False if train: batch_size = 512 epochs = 75 param_grid = dict(init_mode=['he_normal', 'he_uniform'], activation=['relu', 'softplus'], lr=[0.001, 0.005, 0.01, 0.05, 0.1], optimizer=[Adagrad, Adam, Nadam], layers_num=[3, 4, 5]) # Create a neural network wrapper for grid search model_CV = KerasClassifier(build_fn=create_cnn_model, epochs=epochs, batch_size=batch_size, verbose=1) stoper = EarlyStopping(monitor='val_loss', min_delta=0, patience=8, verbose=1) grid = GridSearchCV(estimator=model_CV, param_grid=param_grid, scoring='accuracy', n_jobs=-1, cv=3, verbose=3) grid_result = grid.fit(X_train, y_train, validation_split=0.2, callbacks=[stoper]) # Save the best model grid_result.best_estimator_.model.save('cnn_model.pkl') y_pred = grid_result.predict(X_test) show_score(grid_result, X_test, y_test) inference('dataset', y_pred, y_test, le) else: # The accuracy is pretty high and I'm sure it's not due to a well-trained network. I trained networks on Colab, # in theory, random_state should provide the same data split, but judging by the accuracy, the data from the # training set got into the test set while the model loading. model = load_model('cnn_model.pkl')
def load_model(self, file_name): model = models.load_model(get_model_path(file_name)) self.keras_clf = KerasClassifier(model)
class MlpKerasWrapper: def __init__(self, feature_name, epochs, batch_size): self.epochs = epochs self.batch_size = batch_size self.keras_clf = None self.input_shape = None self.feature_name = feature_name def model_builder(self, input_shape_length, layer1, layer2, activation, dropout): """ Initialize mlp :param input_shape_length: the dimension of the input :param layer1: first layer neurons. default to 300 :param layer2: second layer neurons. default to 50 (try 100) :param activation: activation function :param dropout: the dropout rate """ self.input_shape = (input_shape_length, ) # Create the model model = models.Sequential() model.add(Dense(layer1, input_shape=self.input_shape)) model.add(BatchNormalization()) model.add(Dropout(dropout)) model.add(Dense(layer2, activation=activation)) model.add(BatchNormalization()) model.add(Dropout(dropout)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model def create_model(self, input_shape_length, layer1, layer2, activation, dropout): self.keras_clf = KerasClassifier(self.model_builder, input_shape_length=input_shape_length, layer1=layer1, layer2=layer2, activation=activation, dropout=dropout) def fit(self, x_train, y_train, grid_search, class_weight): print('Training MLP on', self.feature_name) if grid_search: activation = ['tanh'] dropout = [0.3, 0.5] layer1 = [100, 300, 500] layer2 = [50, 100, 150] param_grid = dict(layer1=layer1, layer2=layer2, activation=activation, dropout=dropout) grid = GridSearchCV(estimator=self.keras_clf, param_grid=param_grid, cv=5, return_train_score=False) grid_result = grid.fit(x_train, y_train, epochs=self.epochs, batch_size=self.batch_size, verbose=0, class_weight=class_weight) # Summarize results print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) self.keras_clf = grid.best_estimator_ else: # Normal fit self.keras_clf.fit(x_train, y_train, epochs=self.epochs, batch_size=self.batch_size, verbose=0) y_pred = self.keras_clf.predict(x_train) print('MLP accuracy on train for{}:'.format(self.feature_name), accuracy_score(y_train, y_pred)) def evaluate(self, x_test, y_test): y_pred = self.keras_clf.predict(x_test) print('MLP performance on test for', self.feature_name) print('Accuracy:', accuracy_score(y_test, y_pred), 'Precision:', precision_score(y_test, y_pred), 'Recall:', recall_score(y_test, y_pred)) # Confusion matrix cm = confusion_matrix(y_test, y_pred) cm_display = ConfusionMatrixDisplay(cm) # Precision recall precision, recall, _ = precision_recall_curve( y_test, y_pred, pos_label=self.keras_clf.classes_[1]) pr_display = PrecisionRecallDisplay(precision=precision, recall=recall) # Roc fpr, tpr, _ = roc_curve(y_test, y_pred, pos_label=self.keras_clf.classes_[1]) roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr) # Figure figure: Figure = plt.figure(1, figsize=(15, 6)) figure.suptitle('MLP on {}'.format(self.feature_name), fontsize=20) (ax1, ax2, ax3) = figure.subplots(1, 3) ax1.set_title('Confusion matrix') cm_display.plot(ax=ax1) ax2.set_title('Precision recall') pr_display.plot(ax=ax2) ax3.set_title('Roc curve') roc_display.plot(ax=ax3) file_name = '{}-mlp.png'.format(self.feature_name) figure.savefig( os.path.join(get_folder_path_from_root('images'), file_name)) plt.show() def predict(self, x_test): return self.keras_clf.predict(x_test) def save_model(self, file_name): self.keras_clf.model.save( os.path.join(get_folder_path_from_root('models'), file_name)) def load_model(self, file_name): model = models.load_model(get_model_path(file_name)) self.keras_clf = KerasClassifier(model)
metrics=['accuracy']) history_cnn = sequential_cnn.fit(X_train, Y_train, epochs=10, batch_size=32, validation_data=(X_test, Y_test)) plt_performance(history_cnn) # accuracy - 1 # validation accuracy ~ 0.75 # Hyper-parameter Tuning param_grid = dict(num_filters=[50, 64, 100, 128], dropout=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], vocab_size=[vocab_size], embedding_dim=[50], maxlen=[100], optimizer=['adam', 'rmsprop']) sequential_lstm = KerasClassifier(build_fn=build_lstm, epochs=10, batch_size=32, verbose=False) grid = RandomizedSearchCV(estimator=sequential_lstm, param_distributions=param_grid, cv=10, n_iter=5) result = grid.fit(X_train, Y_train) score = grid.score(X_test, Y_test) print('Optimal parameter values are: ', result.best_estimator_) print('Accuracy of the fit is: ', score)
def classifier(epochs=200, batch_size=200): return KerasClassifier(build_fn=simple_nn, epochs=epochs, batch_size=batch_size)
model.add(Dense(n_classes, activation="softmax")) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model # load the data data = load_watch() X = data['X'] y = data['y'] # create a segment learning pipeline pipe = Pype([('seg', SegmentX(width=100, step=100, order='C')), ('crnn', KerasClassifier(build_fn=crnn_model, epochs=1, batch_size=256, verbose=0))]) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", pipe.N_train) print("N segments in test: ", pipe.N_test) print("Accuracy score: ", score) img = mpimg.imread('segments.jpg')
def hyperparameter_training(df_combined): train, test = train_test_split(df_combined, test_size=0.2, random_state=0, stratify=df_combined[['gender', 'actor']]) X_train = train.iloc[:, 3:] y_train = train.iloc[:, :2].drop(columns=['gender']) print(X_train.shape) print(X_train) X_train = X_train.drop(columns=['img_path']) X_test = test.iloc[:, 3:] y_test = test.iloc[:, :2].drop(columns=['gender']) print(X_test.shape) X_test = X_test.drop(columns=['img_path']) mean = np.mean(X_train, axis=0) std = np.std(X_train, axis=0) X_train = (X_train - mean) / std X_test = (X_test - mean) / std X_train = np.array(X_train) y_train = np.array(y_train) X_test = np.array(X_test) y_test = np.array(y_test) # X_test = np.asarray(X_test).astype('float32') # X_train = np.asarray(X_train).astype('float32') # y_test = np.asarray(y_test).astype('float32') # y_train = np.asarray(y_train).astype('float32') X_train = X_train[:, :, np.newaxis] X_test = X_test[:, :, np.newaxis] lb = LabelEncoder() y_train = np_utils.to_categorical(lb.fit_transform(y_train)) y_test = np_utils.to_categorical(lb.fit_transform(y_test)) y_trainHot = np.argmax(y_train, axis=1) print(X_train.shape) def create_classifier(optimizer=keras.optimizers.Adam(lr=0.0001)): model = tf.keras.Sequential() model.add(layers.Conv1D(64, kernel_size=(10), activation='relu', input_shape=(X_train.shape[1], 1))) model.add( layers.Conv1D(128, kernel_size=(10), activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))) model.add(layers.MaxPooling1D(pool_size=(8))) model.add(layers.Dropout(0.4)) model.add(layers.Conv1D(128, kernel_size=(10), activation='relu')) model.add(layers.MaxPooling1D(pool_size=(8))) model.add(layers.Dropout(0.4)) model.add(layers.Flatten()) model.add(layers.Dense(256, activation='relu')) model.add(layers.Dropout(0.4)) model.add(layers.Dense(8, activation='sigmoid')) opt = keras.optimizers.Adam(lr=0.0001) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model classifier = KerasClassifier(build_fn=create_classifier) params = { 'batch_size': [30, 32, 34], 'nb_epoch': [25, 50, 75], 'optimizer': ['adam', 'SGD']} grid_search = GridSearchCV(estimator=classifier, param_grid=params, scoring='accuracy', cv=5) grid_search = grid_search.fit(X_train, y_trainHot) print(grid_search.best_params_) print(grid_search.best_score_)