Beispiel #1
0
    def test_integration(self):
        """Does most of the operations in the tutorial and uses many of mcfly's functionalities consecutively."""
        X_train, X_val, y_train, y_val = self.generate_random_data_sets()

        num_classes = y_train.shape[1]
        models = modelgen.generate_models(
            X_train.shape,
            number_of_classes=num_classes,
            number_of_models=2,
            model_type='CNN')  # Because CNNs are quick to train.
        histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(
            X_train,
            y_train,
            X_val,
            y_val,
            models,
            nr_epochs=5,
            subset_size=150,
            verbose=True,
            outputfile=self.outputfile)
        best_model_index = np.argmax(val_accuracies)
        best_model, best_params, best_model_types = models[best_model_index]
        history = best_model.fit(X_train[:200, :, :],
                                 y_train[:200, :],
                                 epochs=2,
                                 validation_data=(X_val, y_val))
        best_model.save(self.modelfile)
        model_reloaded = load_model(self.modelfile)

        assert os.path.exists(self.outputfile)
        assert os.path.exists(self.modelfile)
Beispiel #2
0
def train_model(model, xtrain, ytrain, xval, yval, epochs, i):

    train_set_size = xtrain.shape[0]
    #print(xtrain.shape)
    #print(ytrain.shape)
    #print(xval.shape)
    #print(yval.shape)
    histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(
        xtrain,
        ytrain,
        xval,
        yval,
        model,
        nr_epochs=epochs,
        subset_size=train_set_size,
        verbose=False)

    best_model_index = np.argmax(val_accuracies)
    best_model, best_params, best_model_types = model[best_model_index]
    #logging.info(best_model_index, best_model_types, best_params)

    nr_epochs = epochs
    train_start = time.time()
    history = best_model.fit(xtrain,
                             ytrain,
                             epochs=nr_epochs,
                             validation_data=(xval, yval),
                             verbose=False)
    train_end = time.time()
    train_time = train_end - train_start
    logging.info("TRAINTIME: Training time Iteration " + str(i + 1) + ": " +
                 str(train_time))

    return history, best_model
    def test_train_models_on_samples_with_dataset(self):
        """
        Model should be able to train using a dataset as an input
        """
        num_timesteps = 100
        num_channels = 2
        num_samples_train = 5
        num_samples_val = 3
        X_train = np.random.rand(num_samples_train, num_timesteps,
                                 num_channels)
        y_train = to_categorical(np.array([0, 0, 1, 1, 1]))
        X_val = np.random.rand(num_samples_val, num_timesteps, num_channels)
        y_val = to_categorical(np.array([0, 1, 1]))
        batch_size = 20

        data_train = tf.data.Dataset.from_tensor_slices(
            (X_train, y_train)).batch(batch_size)

        data_val = tf.data.Dataset.from_tensor_slices(
            (X_val, y_val)).batch(batch_size)

        custom_settings = get_default_settings()
        model_type = CNN(X_train.shape, 2, **custom_settings)
        hyperparams = model_type.generate_hyperparameters()
        model = model_type.create_model(**hyperparams)
        models = [(model, hyperparams, "CNN")]

        histories, val_metrics, val_losses = \
            find_architecture.train_models_on_samples(
                data_train, None, data_val, None, models,
                nr_epochs=1, subset_size=None, verbose=False,
                outputfile=None, early_stopping_patience='auto',
                batch_size=batch_size)
Beispiel #4
0
def train_model(model, xtrain, ytrain, xval, yval, epochs):

    train_set_size = xtrain.shape[0]
    #print(xtrain.shape)
    #print(ytrain.shape)
    #print(xval.shape)
    #print(yval.shape)
    histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(
        xtrain,
        ytrain,
        xval,
        yval,
        model,
        nr_epochs=epochs,
        subset_size=train_set_size,
        verbose=False)

    best_model_index = np.argmax(val_accuracies)
    best_model, best_params, best_model_types = model[best_model_index]
    #logging.info(best_model_index, best_model_types, best_params)

    nr_epochs = epochs
    history = best_model.fit(xtrain,
                             ytrain,
                             epochs=nr_epochs,
                             validation_data=(xval, yval),
                             verbose=False)

    return history, best_model
Beispiel #5
0
    def test_train_models_on_samples_with_x_and_y(self):
        """
        Model should be able to train using separated x and y values
        """
        num_timesteps = 100
        num_channels = 2
        num_samples_train = 5
        num_samples_val = 3
        X_train = np.random.rand(num_samples_train, num_timesteps,
                                 num_channels)
        y_train = to_categorical(np.array([0, 0, 1, 1, 1]))
        X_val = np.random.rand(num_samples_val, num_timesteps, num_channels)
        y_val = to_categorical(np.array([0, 1, 1]))
        batch_size = 20

        custom_settings = get_default_settings()
        model_type = CNN(X_train.shape, 2, **custom_settings)
        hyperparams = model_type.generate_hyperparameters()
        model = model_type.create_model(**hyperparams)
        models = [(model, hyperparams, "CNN")]

        histories, _, _ = \
            find_architecture.train_models_on_samples(
                X_train, y_train, X_val, y_val, models,
                nr_epochs=1, subset_size=10, verbose=False,
                outputfile=None, early_stopping_patience='auto',
                batch_size=batch_size)
        assert len(histories) == 1
    def test_integration(self):
        """Does most of the operations in the tutorial and uses many of mcfly's functionalities consecutively."""
        X_train, X_val, y_train, y_val = self.generate_random_data_sets()

        num_classes = y_train.shape[1]
        metric = 'accuracy'
        models = modelgen.generate_models(
            X_train.shape,
            number_of_classes=num_classes,
            number_of_models=2,
            metrics=[metric],
            model_type='CNN')  # Because CNNs are quick to train.
        histories, val_accuracies, _ = find_architecture.train_models_on_samples(
            X_train,
            y_train,
            X_val,
            y_val,
            models,
            nr_epochs=5,
            subset_size=150,
            verbose=True,
            outputfile=self.outputfile)
        best_model_index = np.argmax(val_accuracies[metric])
        best_model, _, _ = models[best_model_index]
        _ = best_model.fit(X_train[:200, :, :],
                           y_train[:200, :],
                           epochs=2,
                           validation_data=(X_val, y_val))
        best_model.save(self.modelfile)
        model_reloaded = load_model(self.modelfile)
        assert model_reloaded is not None, "Expected model"  #TODO: check if it's a real model
        assert len(histories) == 2, "Expected two models in histories"
        assert os.path.exists(self.outputfile)
        assert os.path.exists(self.modelfile)
Beispiel #7
0
    def test_train_models_on_samples_with_generators(self):
        """
        Model should be able to train using a generator as an input
        """
        num_timesteps = 100
        num_channels = 2
        num_samples_train = 5
        num_samples_val = 3
        X_train = np.random.rand(num_samples_train, num_timesteps,
                                 num_channels)
        y_train = to_categorical(np.array([0, 0, 1, 1, 1]))
        X_val = np.random.rand(num_samples_val, num_timesteps, num_channels)
        y_val = to_categorical(np.array([0, 1, 1]))
        batch_size = 20

        class DataGenerator(Sequence):
            def __init__(self, x_set, y_set, batch_size):
                self.x, self.y = x_set, y_set
                self.batch_size = batch_size

            def __len__(self):
                return math.ceil(len(self.x) / self.batch_size)

            def __getitem__(self, idx):
                batch_x = self.x[idx * self.batch_size:(idx + 1) *
                                 self.batch_size]
                batch_y = self.y[idx * self.batch_size:(idx + 1) *
                                 self.batch_size]
                return batch_x, batch_y

        data_train = DataGenerator(X_train, y_train, batch_size)
        data_val = DataGenerator(X_val, y_val, batch_size)

        custom_settings = get_default_settings()
        model_type = CNN(X_train.shape, 2, **custom_settings)
        hyperparams = model_type.generate_hyperparameters()
        model = model_type.create_model(**hyperparams)
        models = [(model, hyperparams, "CNN")]

        histories, _, _ = \
            find_architecture.train_models_on_samples(
                data_train, None, data_val, None, models,
                nr_epochs=1, subset_size=None, verbose=False,
                outputfile=None, early_stopping_patience='auto',
                batch_size=batch_size)
        assert len(histories) == 1
Beispiel #8
0
    def train_models_on_samples_empty(self):
        num_timesteps = 100
        num_channels = 2
        num_samples_train = 5
        num_samples_val = 3
        X_train = np.random.rand(num_samples_train, num_timesteps,
                                 num_channels)
        y_train = to_categorical(np.array([0, 0, 1, 1, 1]))
        X_val = np.random.rand(num_samples_val, num_timesteps, num_channels)
        y_val = to_categorical(np.array([0, 1, 1]))

        histories, val_metrics, val_losses = \
            find_architecture.train_models_on_samples(
                X_train, y_train, X_val, y_val, [],
                nr_epochs=1, subset_size=10, verbose=False,
                outputfile=None, early_stopping=False,
                batch_size=20, metric='accuracy')
        assert len(histories) == 0
Beispiel #9
0
    def train_models_on_samples_empty(self):
        np.random.seed(123)
        num_timesteps = 100
        num_channels = 2
        num_samples_train = 5
        num_samples_val = 3
        X_train = np.random.rand(
            num_samples_train,
            num_timesteps,
            num_channels)
        y_train = to_categorical(np.array([0, 0, 1, 1, 1]))
        X_val = np.random.rand(num_samples_val, num_timesteps, num_channels)
        y_val = to_categorical(np.array([0, 1, 1]))

        histories, val_metrics, val_losses = \
            find_architecture.train_models_on_samples(
                X_train, y_train, X_val, y_val, [],
                nr_epochs=1, subset_size=10, verbose=False,
                outputfile=None, early_stopping=False,
                batch_size=20, metric='accuracy')
        assert len(histories) == 0
Beispiel #10
0
    def test_train_models_on_samples_empty(self):
        np.random.seed(123)
        num_timesteps = 100
        num_channels = 2
        num_samples_train = 5
        num_samples_val = 3
        X_train = np.random.rand(
            num_samples_train,
            num_timesteps,
            num_channels)
        y_train = to_categorical(np.array([0, 0, 1, 1, 1]))
        X_val = np.random.rand(num_samples_val, num_timesteps, num_channels)
        y_val = to_categorical(np.array([0, 1, 1]))

        def run(wf):
            return noodles.run_process(wf, n_processes=4, registry=serial_registry)

        histories, val_metrics, val_losses = \
            find_architecture.train_models_on_samples(
                X_train, y_train, X_val, y_val, [],
                nr_epochs=1, subset_size=10, verbose=False,
                outputfile=None, early_stopping=False,
                batch_size=20, metric='accuracy', use_noodles=run)
        assert len(histories) == 0
for j in range(len(Xs)):
    print('fold ' + str(j))
    models = [(get_fresh_copy(model,
                              params['learning_rate']), params, model_type)
              for model, params, model_type in models]
    X_train, y_train, X_val, y_val = split_train_small_val(Xs,
                                                           ys,
                                                           j,
                                                           trainsize=trainsize,
                                                           valsize=valsize)
    histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(
        X_train,
        y_train,
        X_val,
        y_val,
        models,
        nr_epochs=nr_epochs,
        subset_size=subset_size,
        verbose=True,
        outputfile=os.path.join(resultpath, 'experiment' + str(j) + '.json'),
        early_stopping=True)
    histories_list.append(histories)
    val_accuracies_list.append(val_accuracies)
    val_losses.append(val_losses)
print(time.time() - t)

# In[6]:

# Read them all back in
import json
model_jsons = []
Beispiel #12
0


#what is the fraction of classes in the validation set?
pd.Series(y_val.mean(axis=0), index=labels)


if not os.path.exists(result_path):
        os.makedirs(result_path)



histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(X_train, y_train,
                                                                           X_val, y_val,
                                                                           models,nr_epochs=5,
                                                                           subset_size=512,
                                                                           verbose=True,
                                                                           batch_size=32,
                                                                           outputpath=result_path,
                                                                           early_stopping=True)



print('Details of the training process were stored in ',os.path.join(result_path, 'models.json'))



best_model_index = np.argmax(val_accuracies)
best_model, best_params, best_model_types = models[best_model_index]
print('Model type and parameters of the best model:')
print(best_model_types)
print(best_params)
# In[13]:

# Define directory where the results, e.g. json file, will be stored
resultpath = os.path.join(data_path, '..', 'data/models')
if not os.path.exists(resultpath):
    os.makedirs(resultpath)

# In[14]:

outputfile = os.path.join(resultpath, 'modelcomparison_pamap.json')
histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(
    X_train,
    y_train_binary,
    X_val,
    y_val_binary,
    models,
    nr_epochs=5,
    subset_size=1000,
    verbose=True,
    outputfile=outputfile)
print('Details of the training process were stored in ', outputfile)

# In[15]:

best_model_index = np.argmax(val_accuracies)
best_model, best_params, best_model_types = models[best_model_index]
print('Model type and parameters of the best model:')
print(best_model_types)
print(best_params)

# ## Train the best model on the full dataset
Beispiel #14
0
                                  number_of_models=number_of_models)

# In[6]:

#what is the fraction of a vs c in the validation set?
y_val.mean(axis=0)

# In[7]:

if not os.path.exists(result_path):
    os.makedirs(result_path)

# In[ ]:

outputfile = os.path.join(result_path, 'modelcomparison.json')
histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(
    X_train,
    y_train,
    X_val,
    y_val,
    models,
    nr_epochs=nr_epochs,
    subset_size=subset_size,
    verbose=True,
    batch_size=batch_size,
    outputfile=outputfile,
    early_stopping=early_stopping)
print('Details of the training process were stored in ', outputfile)

# In[ ]:
def main(argv):
    infile = argv[0]
    outdir = argv[1]

    sleep_states = ['Wake', 'NREM 1', 'NREM 2', 'NREM 3', 'REM']
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    resultdir = os.path.join(outdir, 'models')
    if not os.path.exists(resultdir):
        os.makedirs(resultdir)

    all_data = np.load(infile)
    X = all_data['data']
    y = all_data['labels']
    users = all_data['user']
    dataset = all_data['dataset']
    X = X[dataset == 'UPenn']
    y = y[dataset == 'UPenn']
    num_classes = y.shape[1]

    # Shuffle data
    shuf_idx = np.arange(X.shape[0])
    np.random.shuffle(shuf_idx)
    X = X[shuf_idx]
    y = y[shuf_idx]
    users = [users[i] for i in shuf_idx]

    # Get small subset
    #idx = np.random.randint(X.shape[0],size=10000)
    #X = X[idx]; y = y[idx]; users = [users[i] for i in idx]
    y_lbl = y.argmax(axis=1)
    y_lbl = [sleep_states[i] for i in y_lbl]

    # Use nested cross-validation based on users
    # Outer CV
    outer_cv_splits = 5
    inner_cv_splits = 3
    group_kfold = GroupKFold(n_splits=outer_cv_splits)
    fold = 0
    predictions = []
    for train_indices, test_indices in group_kfold.split(X, y, users):
        fold += 1
        print('Evaluating fold %d' % fold)
        out_X_train = X[train_indices]
        out_y_train = y[train_indices]
        naug_samp = augment(out_X_train,
                            out_y_train,
                            sleep_states,
                            fold=fold,
                            aug_factor=1.5)
        out_X_train = np.memmap('tmp/X_aug_fold'+str(fold)+'.np', dtype='float32', mode='r', \
                                shape=(naug_samp,out_X_train.shape[1],out_X_train.shape[2]))
        out_y_train = np.memmap('tmp/y_aug_fold' + str(fold) + '.np',
                                dtype='int32',
                                mode='r',
                                shape=(naug_samp, out_y_train.shape[1]))
        out_X_test = X[test_indices]
        out_y_test = y[test_indices]
        out_lbl = out_y_train.argmax(axis=1)

        # Inner CV
        val_acc = []
        models = []
        strat_kfold = StratifiedKFold(n_splits=inner_cv_splits,
                                      random_state=0,
                                      shuffle=False)
        for grp_train_indices, grp_test_indices in strat_kfold.split(
                out_X_train, out_lbl):
            grp_train_indices = sample(list(grp_train_indices),
                                       len(grp_train_indices))
            in_X_train = out_X_train[grp_train_indices]
            in_y_train = out_y_train[grp_train_indices]
            grp_test_indices = sample(list(grp_test_indices), 1000)
            in_X_test = out_X_train[grp_test_indices]
            in_y_test = out_y_train[grp_test_indices]
            #print(Counter(in_y_train[:1000].argmax(axis=1))); continue

            limit_mem()
            # Generate candidate architectures
            model = modelgen.generate_models(in_X_train.shape, \
                                          number_of_classes=num_classes, \
                                          number_of_models=1, metrics=[macro_f1], model_type='CNN')

            # Compare generated architectures on a subset of data for few epochs
            outfile = os.path.join(resultdir, 'model_comparison.json')
            hist, acc, loss = find_architecture.train_models_on_samples(in_X_train, \
                                       in_y_train, in_X_test, in_y_test, model, nr_epochs=5, \
                                       subset_size=5000, verbose=True, batch_size=50, \
                                       outputfile=outfile, metric='macro_f1')
            val_acc.append(acc[0])
            models.append(model[0])

        # Choose best model and evaluate values on validation data
        print('Evaluating on best model for fold %d' % fold)
        best_model_index = np.argmax(val_acc)
        best_model, best_params, best_model_type = models[best_model_index]
        print('Best model type and parameters:')
        print(best_model_type)
        print(best_params)

        nr_epochs = 5
        ntrain = out_X_train.shape[0]
        nval = ntrain // 5
        val_idx = np.random.randint(ntrain, size=nval)
        train_idx = [
            i for i in range(out_X_train.shape[0]) if i not in val_idx
        ]
        trainX = out_X_train[train_idx]
        trainY = out_y_train[train_idx]
        valX = out_X_train[val_idx]
        valY = out_y_train[val_idx]

        limit_mem()
        best_model = modelgen.generate_CNN_model(trainX.shape, num_classes, filters=best_params['filters'], \
                                        fc_hidden_nodes=best_params['fc_hidden_nodes'], \
                                        learning_rate=best_params['learning_rate'], \
                                        regularization_rate=best_params['regularization_rate'], \
                                        metrics=[macro_f1])
        history = best_model.fit(trainX, trainY, epochs=nr_epochs, batch_size=50, \
                                 validation_data=(valX, valY))

        # Save model
        best_model.save(
            os.path.join(resultdir, 'best_model_fold' + str(fold) + '.h5'))

        # Predict probability on validation data
        probs = best_model.predict_proba(out_X_test, batch_size=1)
        y_pred = probs.argmax(axis=1)
        y_true = out_y_test.argmax(axis=1)
        predictions.append((y_true, y_pred))

    get_classification_report(predictions, sleep_states)
Beispiel #16
0
                        print(" ")
                        print("Model description:")
                        model.summary()
                        print(" ")
                        print("Model type:")
                        print(model_types)
                        print(" ")

                

                modelcomparison = "modelcomparison4"
                outputfile = path_predict+modelcomparison+'.json'
                histories, val_metrics, val_losses = find_architecture.train_models_on_samples(X_train, y_train,
                                                                                           X_val, y_val,
                                                                                           models,nr_epochs=3,
                                                                                           subset_size=80000,
                                                                                           verbose=True,
                                                                                           #metric='acc',
                                                                                           outputfile=outputfile)
                print(np.asarray(val_metrics).shape)
                print(val_metrics)
                print(len(histories))
                sys.exit()


                modelcomparisons = pd.DataFrame({'model':[str(params) for model, params, model_types in models],
                                        'train_acc': [history.history['acc'][-1] for history in histories],
                                        'train_loss': [history.history['loss'][-1] for history in histories],
                                        'val_acc': [history.history['val_acc'][-1] for history in histories],
                                        'val_loss': [history.history['val_loss'][-1] for history in histories]
                                        })
Beispiel #17
0
X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)
Y_test = np.array(Y_test)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

num_classes = Y_train.shape[1]
models = modelgen.generate_models(X_train.shape,
                                  number_of_classes=num_classes,
                                  number_of_models=1)
histories, val_accuracies, val_losses = find_architecture.train_models_on_samples(
    X_train,
    Y_train,
    X_test,
    Y_test,
    models,
    nr_epochs=1,
    subset_size=300,
    verbose=True,
    outputfile=outputfile)
print('Details of the training process were stored in ', outputfile)

modelcomparisons = pd.DataFrame({
    'model': [str(params) for model, params, model_types in models],
    'train_acc': [history.history['accuracy'][-1] for history in histories],
    'train_loss': [history.history['loss'][-1] for history in histories],
    'val_accuracy':
    [history.history['val_accuracy'][-1] for history in histories],
    'val_loss': [history.history['val_loss'][-1] for history in histories]
})
def main(argv):
    indir = argv[0]
    mode = argv[1]  # binary or multiclass
    outdir = argv[2]

    if mode == 'multiclass':
        sleep_states = [
            'Wake', 'NREM 1', 'NREM 2', 'NREM 3', 'REM', 'Nonwear', 'Wake_ext'
        ]
    else:
        sleep_states = ['Wake', 'Sleep', 'Nonwear', 'Wake_ext']
        collate_sleep = ['NREM 1', 'NREM 2', 'NREM 3', 'REM']

    valid_sleep_states = [
        state for state in sleep_states if state != 'Wake_ext'
    ]
    num_classes = len(valid_sleep_states)

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    resultdir = os.path.join(outdir, mode, 'models')
    if not os.path.exists(resultdir):
        os.makedirs(resultdir)

    # Read data from disk
    data = pd.read_csv(os.path.join(indir, 'labels.txt'), sep='\t')
    files = []
    labels = []
    users = []
    for idx, row in data.iterrows():
        files.append(os.path.join(indir, row['filename']) + '.npy')
        labels.append(row['labels'])
        users.append(row['user'])
    if mode == 'binary':
        labels = ['Sleep' if lbl in collate_sleep else lbl for lbl in labels]

    early_stopping = EarlyStopping(monitor='val_macro_f1',
                                   mode='max',
                                   verbose=1,
                                   patience=2)

    seqlen, n_channels = np.load(files[0]).shape
    batch_size = 32

    # Use nested cross-validation based on users
    # Outer CV
    unique_users = list(set(users))
    random.shuffle(unique_users)
    out_cv_splits = 5
    in_cv_splits = 5
    out_fold_nusers = len(unique_users) // out_cv_splits
    out_n_epochs = 10
    in_n_epochs = 1
    predictions = []
    wake_idx = sleep_states.index('Wake')
    wake_ext_idx = sleep_states.index('Wake_ext')
    for out_fold in range(out_cv_splits):
        print('Evaluating fold %d' % (out_fold + 1))
        test_users = unique_users[out_fold * out_fold_nusers:(out_fold + 1) *
                                  out_fold_nusers]
        trainval_users = [
            user for user in unique_users if user not in test_users
        ]
        train_users = trainval_users[:int(0.8 * len(trainval_users))]
        val_users = trainval_users[len(train_users):]

        out_train_fnames, out_train_labels, out_train_users = get_partition(files, labels, users, train_users,\
                                                                            sleep_states, is_train=True)
        out_val_fnames, out_val_labels, out_val_users = get_partition(
            files, labels, users, val_users, sleep_states)
        out_test_fnames, out_test_labels, out_test_users = get_partition(
            files, labels, users, test_users, sleep_states)

        out_train_gen = DataGenerator(out_train_fnames, out_train_labels, valid_sleep_states, partition='out_train',\
                                        batch_size=batch_size, seqlen=seqlen, n_channels=n_channels,\
                                        n_classes=num_classes, shuffle=True, augment=True, aug_factor=0.75, balance=True)
        print(
            'Fold {}: Computing mean and standard deviation'.format(out_fold +
                                                                    1))
        mean, std = out_train_gen.fit()
        #mean = None; std = None
        out_val_gen = DataGenerator(out_val_fnames, out_val_labels, valid_sleep_states, partition='out_val',\
                                      batch_size=batch_size, seqlen=seqlen, n_channels=n_channels,\
                                      n_classes=num_classes, mean=mean, std=std)
        out_test_gen = DataGenerator(out_test_fnames, out_test_labels, valid_sleep_states, partition='out_test',\
                                       batch_size=batch_size, seqlen=seqlen, n_channels=n_channels,\
                                       n_classes=num_classes, mean=mean, std=std)

        # Get class weights
        out_class_wts = class_weight.compute_class_weight(
            'balanced', np.unique(out_train_labels), out_train_labels)

        # Inner CV
        val_acc = []
        models = []
        in_fold_nusers = len(trainval_users) // in_cv_splits
        for in_fold in range(in_cv_splits):
            in_val_users = trainval_users[in_fold *
                                          in_fold_nusers:(in_fold + 1) *
                                          in_fold_nusers]
            in_train_users = [
                user for user in trainval_users if user not in in_val_users
            ]

            in_train_fnames, in_train_labels, in_train_users = get_partition(files, labels, users, in_train_users,\
                                                             sleep_states, is_train=True)
            in_val_fnames, in_val_labels, in_val_users = get_partition(
                files, labels, users, in_val_users, sleep_states)

            in_train_gen = DataGenerator(in_train_fnames, in_train_labels, valid_sleep_states, partition='in_train',\
                                          batch_size=batch_size, seqlen=seqlen, n_channels=n_channels,\
                                          n_classes=num_classes, shuffle=True, augment=True, aug_factor=0.75, balance=True,\
                                          mean=mean, std=std)
            in_val_gen = DataGenerator(in_val_fnames, in_val_labels, valid_sleep_states, partition='in_val',\
                                        batch_size=batch_size, seqlen=seqlen, n_channels=n_channels,\
                                        n_classes=num_classes, mean=mean, std=std)

            # Generate candidate architectures
            model = modelgen.generate_models((None, seqlen, n_channels), \
                                          number_of_classes=num_classes, \
                                          number_of_models=1, metrics=[macro_f1])#, model_type='CNN')

            # Compare generated architectures on a subset of data for few epochs
            outfile = os.path.join(resultdir, 'model_comparison.json')
            hist, acc, loss = find_architecture.train_models_on_samples(in_train_gen, in_val_gen,
                                       model, nr_epochs=in_n_epochs, n_steps=1000, class_weight=out_class_wts, \
                                       verbose=True, outputfile=outfile, metric='macro_f1')
            val_acc.append(acc[0])
            models.append(model[0])

        # Choose best model and evaluate values on validation data
        print('Evaluating on best model for fold %d' % out_fold)
        best_model_index = np.argmax(val_acc)
        best_model, best_params, best_model_type = models[best_model_index]
        print('Best model type and parameters:')
        print(best_model_type)
        print(best_params)

        if best_model_type == 'CNN':
            best_model = modelgen.generate_CNN_model((None, seqlen, n_channels), num_classes, filters=best_params['filters'], \
                                            fc_hidden_nodes=best_params['fc_hidden_nodes'], \
                                            learning_rate=best_params['learning_rate'], \
                                            regularization_rate=best_params['regularization_rate'], \
                                            metrics=[macro_f1])
        else:
            best_model = modelgen.generate_DeepConvLSTM_model((None, seqlen, n_channels), num_classes,\
                                            filters=best_params['filters'], \
                                            lstm_dims=best_params['lstm_dims'], \
                                            learning_rate=best_params['learning_rate'], \
                                            regularization_rate=best_params['regularization_rate'], \
                                            metrics=[macro_f1])

        # Use early stopping and model checkpoints to handle overfitting and save best model
        model_checkpt = ModelCheckpoint(os.path.join(resultdir,'best_model_fold'+str(out_fold+1)+'.h5'), monitor='val_macro_f1',\
                                                     mode='max', save_best_only=True)
        history = F1scoreHistory()
        hist = best_model.fit_generator(out_train_gen, epochs=out_n_epochs, \
                                 validation_data=out_val_gen, class_weight=out_class_wts,\
                                 callbacks=[early_stopping, model_checkpt])

        # Plot training history
        #    plt.Figure()
        #    plt.plot(history.mean_f1score['train'])
        #    #plt.plot(history.mean_f1score['val'])
        #    plt.title('Model F1-score')
        #    plt.ylabel('F1-score')
        #    plt.xlabel('Batch')
        #    #plt.legend(['Train', 'Test'], loc='upper left')
        #    plt.savefig(os.path.join(resultdir,'Fold'+str(fold)+'_performance_curve.jpg'))
        #    plt.clf()
        #
        ##    # Save model
        ##    best_model.save(os.path.join(resultdir,'best_model_fold'+str(fold)+'.h5'))

        # Predict probability on validation data
        probs = best_model.predict_generator(out_test_gen)
        y_pred = probs.argmax(axis=1)
        y_true = out_test_labels
        predictions.append((out_test_users, y_true, y_pred))

        # Save user report
        if mode == 'binary':
            save_user_report(
                predictions, valid_sleep_states,
                os.path.join(
                    resultdir, 'fold' + str(out_fold + 1) +
                    '_deeplearning_binary_results.csv'))
        else:
            save_user_report(
                predictions, valid_sleep_states,
                os.path.join(
                    resultdir, 'fold' + str(out_fold + 1) +
                    '_deeplearning_multiclass_results.csv'))

    get_classification_report(predictions, valid_sleep_states)

    # Save user report
    if mode == 'binary':
        save_user_report(
            predictions, valid_sleep_states,
            os.path.join(resultdir, 'deeplearning_binary_results.csv'))
    else:
        save_user_report(
            predictions, valid_sleep_states,
            os.path.join(resultdir, 'deeplearning_multiclass_results.csv'))