class ThresholdStratifiedKFold(object): def __init__(self, thresholds, *args, **kwargs): if isinstance(thresholds, Iterable): self.thresholds = list(thresholds) else: self.thresholds = [thresholds] self.stratified = StratifiedKFold(*args, **kwargs) def get_n_splits(self, *args, **kwargs): return self.stratified.get_n_splits(*args, **kwargs) def split(self, X, y): y_thresh = np.zeros(y.shape) for thresh in self.thresholds: y_thresh += y >= thresh for train, test in self.stratified.split(X, y_thresh): yield train, test
def MLKFoldCrossValid(epoch): seed = 7 np.random.seed(seed) dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",") X = dataset[:, 0:8] Y = dataset[:, 8] kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) cvscores = [] for train, test in kfold.split(X, Y): layers = [Dense(12, input_dim=8, activation='relu', kernel_initializer='uniform'),\ Dense(8,activation='relu', kernel_initializer='uniform'),\ Dense(1,activation='sigmoid', kernel_initializer='uniform')] model = Sequential(layers) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(X[train], Y[train], epochs=epoch, batch_size=10, verbose=0) scores = model.evaluate(X[test], Y[test], verbose=0) print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100)) cvscores.append(scores[1] * 100) print("%.2f%%(+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
# Get input shape aux = pd.read_csv(data[0]) inputShape = (parameters['dataLength'], len(aux.columns)) config = None if os.path.exists(parameters['modelConfigPath']): with open(parameters['modelConfigPath'], 'r') as configHandler: config = json.load(configHandler) i = 0 # ====================== Script that start training new models with open(parameters['resultFilePath'], 'a+' ) as cvsFileHandler: # where the results for each fold are appended dictWriter = None for trainIndex, testIndex in kf.split(data, classes_for_stratified): if config is not None and config['fold'] > i: print("Pass fold {}".format(i)) i += 1 continue if config is not None and config['epoch'] == parameters[ 'trainingEpochs']: print("Pass fold {}-".format(i)) i += 1 continue print("======== Fold {} ========".format(i)) # If exists a valid config to resume a training if config is not None and config['fold'] == i and config[ 'epoch'] < parameters['trainingEpochs']: epochs = parameters['trainingEpochs'] - config['epoch']
class_label = "organism_resistence" for result_file in results_file_paths: print("========== {} ==========".format(result_file)) results_df = pd.read_csv(result_file) data = pd.read_csv('csvs/' + results_df.loc[0]['fname']) if 'Unnamed: 0' in data.columns: data = data.drop(columns=['Unnamed: 0']) classes = data[class_label] data = data.drop(columns=[class_label]) data = preprocess(data) classes = preprocess_classes(classes) kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=15) results = [] folds = 0 for train_index, test_index in kf.split(data, classes): print("====== Fold {} =====".format(folds)) folds_classifiers = results_df[results_df['fold'] == folds] data_train, data_test = data.iloc[train_index], data.iloc[test_index] mean = data_train.mean() std = data_train.std() data_test = normalize(data_test, mean, std) classes_test = classes[test_index] for index, classifier_row in folds_classifiers.iterrows(): print("====== {} =====".format(classifier_row['classifier'])) classifier_fname = 'classifiers/{}_{}_fold{}.pkl'.format( classifier_row['fname'].split('.')[0], classifier_row['classifier'], classifier_row['fold']) classifier = joblib.load(open(classifier_fname, 'rb')) try: predicted = classifier.predict(data_test)