Ejemplo n.º 1
0
class ThresholdStratifiedKFold(object):
    def __init__(self, thresholds, *args, **kwargs):
        if isinstance(thresholds, Iterable):
            self.thresholds = list(thresholds)
        else:
            self.thresholds = [thresholds]
        self.stratified = StratifiedKFold(*args, **kwargs)
    
    def get_n_splits(self, *args,  **kwargs):
        return self.stratified.get_n_splits(*args, **kwargs)
    
    def split(self, X, y):
        y_thresh = np.zeros(y.shape)
        for thresh in self.thresholds:
            y_thresh += y >= thresh
        for train, test in self.stratified.split(X, y_thresh):
            yield train, test
Ejemplo n.º 2
0
def MLKFoldCrossValid(epoch):
    seed = 7
    np.random.seed(seed)
    dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
    X = dataset[:, 0:8]
    Y = dataset[:, 8]
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
    cvscores = []

    for train, test in kfold.split(X, Y):
        layers = [Dense(12, input_dim=8, activation='relu', kernel_initializer='uniform'),\
                  Dense(8,activation='relu', kernel_initializer='uniform'),\
                  Dense(1,activation='sigmoid', kernel_initializer='uniform')]
        model = Sequential(layers)
        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        model.fit(X[train], Y[train], epochs=epoch, batch_size=10, verbose=0)
        scores = model.evaluate(X[test], Y[test], verbose=0)
        print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
        cvscores.append(scores[1] * 100)
    print("%.2f%%(+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
# Get input shape
aux = pd.read_csv(data[0])
inputShape = (parameters['dataLength'], len(aux.columns))

config = None
if os.path.exists(parameters['modelConfigPath']):
    with open(parameters['modelConfigPath'], 'r') as configHandler:
        config = json.load(configHandler)

i = 0
# ====================== Script that start training new models
with open(parameters['resultFilePath'], 'a+'
          ) as cvsFileHandler:  # where the results for each fold are appended
    dictWriter = None
    for trainIndex, testIndex in kf.split(data, classes_for_stratified):
        if config is not None and config['fold'] > i:
            print("Pass fold {}".format(i))
            i += 1
            continue
        if config is not None and config['epoch'] == parameters[
                'trainingEpochs']:
            print("Pass fold {}-".format(i))
            i += 1
            continue
        print("======== Fold {} ========".format(i))

        # If exists a valid config  to resume a training
        if config is not None and config['fold'] == i and config[
                'epoch'] < parameters['trainingEpochs']:
            epochs = parameters['trainingEpochs'] - config['epoch']
class_label = "organism_resistence"

for result_file in results_file_paths:
    print("========== {} ==========".format(result_file))
    results_df = pd.read_csv(result_file)
    data = pd.read_csv('csvs/' + results_df.loc[0]['fname'])
    if 'Unnamed: 0' in data.columns:
        data = data.drop(columns=['Unnamed: 0'])
    classes = data[class_label]
    data = data.drop(columns=[class_label])
    data = preprocess(data)
    classes = preprocess_classes(classes)
    kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=15)
    results = []
    folds = 0
    for train_index, test_index in kf.split(data, classes):
        print("====== Fold {} =====".format(folds))
        folds_classifiers = results_df[results_df['fold'] == folds]
        data_train, data_test = data.iloc[train_index], data.iloc[test_index]
        mean = data_train.mean()
        std = data_train.std()
        data_test = normalize(data_test, mean, std)
        classes_test = classes[test_index]
        for index, classifier_row in folds_classifiers.iterrows():
            print("====== {} =====".format(classifier_row['classifier']))
            classifier_fname = 'classifiers/{}_{}_fold{}.pkl'.format(
                classifier_row['fname'].split('.')[0],
                classifier_row['classifier'], classifier_row['fold'])
            classifier = joblib.load(open(classifier_fname, 'rb'))
            try:
                predicted = classifier.predict(data_test)