def calc_vol_acc_Tr_Te(x_train,
                       y_train,
                       x_test,
                       y_test,
                       patients_train,
                       patients_test,
                       old_way,
                       folder=None,
                       to_categorical=True,
                       data_reduction=None):
    if folder is None:
        folder = "."

    train_set, test_set, input_shape, labels = format_dataset(
        x_train,
        y_train,
        x_test,
        y_test,
        verbose=False,
        ret_labels=True,
        data_reduction=data_reduction,
        to_categorical=to_categorical,
        old_way=old_way)
    if data_reduction is not None:
        y_test = y_test[:y_test.shape[0] // data_reduction]
        y_train = y_train[:y_train.shape[0] // data_reduction]

    model = load_model(folder)

    num_Tr = len(y_train)
    num_Te = len(y_test)

    # calculate for training set
    pred_percents = model.predict(train_set[0])
    true_labels = y_train
    pred_labels = np.argmax(pred_percents, axis=1)
    vol_accTr, num_volTr = calculate_volume_accuracy(pred_labels, true_labels,
                                                     pred_percents, 1,
                                                     patients_train,
                                                     patients_test)

    # calculate for test set
    pred_percents = model.predict(test_set[0])
    true_labels = y_test
    pred_labels = np.argmax(pred_percents, axis=1)
    vol_accTe, num_volTe = calculate_volume_accuracy(pred_labels, true_labels,
                                                     pred_percents, 0,
                                                     patients_train,
                                                     patients_test)
    return vol_accTr, vol_accTe, num_volTr, num_volTe, num_Tr, num_Te
def observe_results(data_generator,
                    folder=None,
                    to_categorical=True,
                    data_reduction=None,
                    mode=0,
                    observe_training=0,
                    filename=None,
                    num_columns=None,
                    misclassified_wizard=True,
                    custom_observation=None,
                    old_way=False):
    """
    :param data_generator: where to get the data from (keras.datasets.mnist.load_data, cifar...)
    :param folder: name of folder where results are found
    :param to_categorical: to_categorical flag when formatting the dataset
    :param data_reduction: if set to a number, use only (1/data_reduction) of all data. None uses all the data
    :param mode: plotting mode, 0 shows color in the main diagonal, 1 does not, 2-3 adds the matrix transposed and only shows lower half of result
    :param observe_training: 0, we observe results for training set, 1 for test set, 2 for both
    :param misclassified_wizard: if True, shows the wizard to see mistakes, else skips this
    :param custom_observation: made specially to look at patients in radiomics dataset, look at full patient instad of only slices
    :return:
    """
    if folder is None:
        folder = "."

    print("Loading training and test sets ...")
    try:
        # In case data_generator has to be called to get the data
        (x_train, y_train), (x_test, y_test) = data_generator()
    except TypeError:
        # In case data_generator already holds the loaded data (not callable)
        (x_train, y_train), (x_test, y_test) = data_generator

    print("Reshaping training and test sets ...")
    train_set, test_set, input_shape, labels = format_dataset(
        x_train,
        y_train,
        x_test,
        y_test,
        verbose=True,
        ret_labels=True,
        data_reduction=data_reduction,
        to_categorical=to_categorical,
        old_way=old_way)
    if data_reduction is not None:
        x_test = x_test[:x_test.shape[0] // data_reduction]
        y_test = y_test[:y_test.shape[0] // data_reduction]
        x_train = x_train[:x_train.shape[0] // data_reduction]
        y_train = y_train[:y_train.shape[0] // data_reduction]
        train_set[0] = train_set[0][:train_set[0].shape[0] // data_reduction]
        test_set[0] = test_set[0][:test_set[0].shape[0] // data_reduction]

    print("Loading model from {} ...".format(folder))
    model = load_model(folder)

    print("Calculating predicted labels ...")
    if observe_training == 1:
        pred_percents = model.predict(train_set[0])
        true_labels = y_train
        examples_set = x_train
        confusion_title = "Confusion Matrix (Training Set)"
    elif observe_training == 2:
        pred_percents = model.predict(
            np.concatenate((train_set[0], test_set[0])))
        true_labels = np.concatenate((y_train, y_test))
        examples_set = np.concatenate((x_train, x_test))
        confusion_title = "Confusion Matrix (Training & Test Set)"
    else:
        pred_percents = model.predict(test_set[0])
        true_labels = y_test
        examples_set = x_test
        confusion_title = "Confusion Matrix (Test Set)"
    pred_labels = np.argmax(pred_percents, axis=1)

    if custom_observation is not None:
        num_errors = sum(
            [abs(x - y) for x, y in list(zip(pred_labels, true_labels))])
        print("Slices Results: {} errors from {} slices (Accuracy: {})".format(
            num_errors, len(pred_labels), 1 - num_errors / len(pred_labels)))
        confusion_title += " - Custom ({})".format(custom_observation)
        classification_per_patient = {}
        score_per_patient = {}
        patients_train, patients_test = apply_custom_observation(
            custom_observation)
        ignore_patient = ""
        if observe_training == 1:
            patients = patients_train
            ignore_patient = patients_test[0]
        elif observe_training == 2:
            patients = patients_train + patients_test
        else:
            patients = patients_test
            ignore_patient = patients_train[-1]
        prev_patient = ""
        new_true_labels = []
        unique_patients = []
        for i, patient in enumerate(patients):
            if patient not in classification_per_patient:
                classification_per_patient[patient] = {}
                score_per_patient[patient] = {}
            try:
                classification_per_patient[patient][pred_labels[i]] += 1
                score_per_patient[patient] += pred_percents[i]
            except KeyError:
                classification_per_patient[patient][pred_labels[i]] = 1
                score_per_patient[patient] = pred_percents[i]
            if prev_patient != patient and patient != ignore_patient:
                new_true_labels.append(true_labels[i])
                unique_patients.append(patient)
            prev_patient = patient

        pred_labels = []
        for patient in unique_patients:
            # Ignore patients that have half the 3D image in test and other half in training
            if patient == ignore_patient:
                continue
            # Assume there are only 2 labels: 0 and 1
            keys = list(classification_per_patient[patient].keys())
            if len(keys) == 1:
                pred_labels.append(keys[0])
            elif len(keys) == 2:
                num_k0 = classification_per_patient[patient][keys[0]]
                num_k1 = classification_per_patient[patient][keys[1]]
                if keys[0] != keys[1]:
                    pred_labels.append(keys[0] if num_k0 > num_k1 else keys[1])
                else:
                    pred_labels.append(np.argmax(score_per_patient[patient]))
            else:
                print(keys)
                input("This should never happen!")
                continue
        print("Predictions:", pred_labels)
        print("True labels:", new_true_labels)
        true_labels = np.array(new_true_labels)
        pred_labels = np.array(pred_labels)

    errors_vector = (pred_labels != true_labels)
    num_errors = np.sum(errors_vector)
    size_set = pred_labels.size
    print("Results: {} errors from {} examples (Accuracy: {})".format(
        num_errors, size_set, 1 - num_errors / size_set))

    print("Drawing confusion matrix ...")
    ignore_diag = True
    max_scale_factor = 1.0
    color_by_row = False
    half_matrix = False
    if mode == 1 or mode == 3:
        ignore_diag = False
        max_scale_factor = 100.0
        color_by_row = True
        if mode == 3:
            half_matrix = True
    elif mode == 2:
        half_matrix = True
    confusion_mat = plot_confusion_matrix(true_labels,
                                          pred_labels,
                                          labels,
                                          title=confusion_title,
                                          plot_half=half_matrix,
                                          filename=filename,
                                          max_scale_factor=max_scale_factor,
                                          ignore_diagonal=ignore_diag,
                                          color_by_row=color_by_row)

    print("Counting misclassified examples ...")
    errors_indices = np.argwhere(errors_vector)
    errors_by_predicted_label = dict([(label, []) for label in labels])
    errors_by_expected_label = dict([(label, []) for label in labels])

    for idx in errors_indices:
        errors_by_expected_label[true_labels[idx][0]].append(idx[0])
        errors_by_predicted_label[pred_labels[idx][0]].append(idx[0])

    print("Labels that were confused by another value:")
    for i, label in enumerate(labels):
        tp = confusion_mat[i][i]
        fp = len(errors_by_expected_label[label])
        print(
            "    Label {}: {:>3} mistakes, {:>5} right answers => Accuracy: {}"
            .format(label, fp, tp, tp / (tp + fp)))
    print("Labels that were mistakenly chosen:")
    for i, label in enumerate(labels):
        tp = confusion_mat[i][i]
        fp = len(errors_by_predicted_label[label])
        print(
            "    Label {}: {:>3} mistakes, {:>5} right answers => Accuracy: {}"
            .format(label, fp, tp, tp / (tp + fp)))

    if not misclassified_wizard:
        return

    while True:
        print("Welcome to the misclassified images viewer!")
        print("Use the number keys + ENTER to select the best option.")
        print("Do you want to filter by predicted value or true value?")
        print(
            "0. Exit\n1. Filter by predicted values\n2. Filter by true values")
        num = -1
        while num < 0 or num > 3:
            try:
                num = int(input(">> "))
            except ValueError:
                num = -1
        if num == 0:
            break
        pred_notrue = num == 1
        print("Filtering by: {} Values\n".format(
            "Predicted" if pred_notrue else "True"))
        while True:
            print("Select the label you want to filter.")
            print("{:>2}. Back".format(0))
            for i, key in enumerate(labels):
                if pred_notrue:
                    num_errors = len(errors_by_predicted_label[key])
                else:
                    num_errors = len(errors_by_expected_label[key])
                print("{:>2}. Label {}  ({} mistakes)".format(
                    i + 1, key, num_errors))
            num = -1
            while num < 0 or num > len(labels):
                try:
                    num = int(input(">> "))
                except ValueError:
                    num = -1
            if num == 0:
                break
            print(
                "Plotting misclassified examples for the {} label {}\n".format(
                    "predicted" if pred_notrue else "true", labels[num - 1]))

            if pred_notrue:
                indices = np.array(errors_by_predicted_label[labels[num - 1]],
                                   dtype=int)
                other_labels = true_labels[indices]
                indices = indices[other_labels.argsort()]
                title_labels = true_labels[indices]
                title = "Predicted label: {}".format(labels[num - 1])
            else:
                indices = np.array(errors_by_expected_label[labels[num - 1]],
                                   dtype=int)
                other_labels = pred_labels[indices]
                indices = indices[other_labels.argsort()]
                title_labels = pred_labels[indices]
                title = "True label: {}".format(labels[num - 1])
            # plot_images(x_test[indices], labels=y_test[indices], labels2=label_test[indices],
            #             label2_description="Predicted label", fig_num=1)
            plot_all_images(examples_set[indices],
                            labels=title_labels,
                            labels2=None,
                            fig_num=1,
                            suptitle=title,
                            max_cols=num_columns)

    txt = input(
        "Press ENTER to see all the misclassified examples unsorted one by one, or q to exit. "
    )
    if len(txt) <= 0 or txt[0] != "q":
        # Plot test examples, and see label comparison
        show_errors_only = True
        print("Plotting {}test images ...".format(
            "incorrectly classified " if show_errors_only else ""))
        plot_images(examples_set,
                    labels=true_labels,
                    labels2=pred_labels,
                    label2_description="Predicted label",
                    show_errors_only=True,
                    fig_num=1)
def experiments_runner(data_generator, experiment_obj, folder=None, data_reduction=None, epochs=100,
                       batch_size=32, early_stopping=10, to_categorical=True, verbose=False):
    # Loads the data from data_generator, loads the experiment object used (containing all the
    # experiments that have to be run), creates/opens a folder where it will save all the data,
    # and runs all experiments and saves all results in a folder structure. If the folder already
    # exists and contains old results, the experiments already performed will not be run again.
    # This allows us to stop the execution, and start it again where we left off.

    print("Loading training and test sets ...")
    # Load into train and test sets
    try:
        # In case data_generator has to be called to get the data
        (x_train, y_train), (x_test, y_test) = data_generator()
    except TypeError:
        # In case data_generator already holds the loaded data (not callable)
        (x_train, y_train), (x_test, y_test) = data_generator

    print("Reshaping training and test sets ...")
    train_set, test_set, input_shape, labels = format_dataset(x_train, y_train, x_test, y_test,
                                                              data_reduction=data_reduction,
                                                              verbose=True, ret_labels=True,
                                                              to_categorical=to_categorical,
                                                              old_way=False)

    # create folder and cd into it
    if folder is None:
        now = datetime.now()
        folder = "{}_{:02d}.{:02d}.{:02d}".format(now.date(), now.hour, now.minute, now.second)
    try:
        os.makedirs(folder)
    except OSError:
        pass    # In case the dir already exists
    os.chdir(folder)

    # load data in old results.yaml, if it exists
    try:
        with open("results.yaml") as f:
            try:
                old_data = yaml.load(f)
                print("'results.yaml' was parsed successfully. The experiments that appear in " +
                      "'results.yaml' will not be executed again.")
            except yaml.YAMLError as YamlError:
                print("There was an error parsing 'results.yaml'. File ignored.")
                print(YamlError)
                old_data = None
    except FileNotFoundError:
        old_data = None

    # train model and save data
    print("Generating and training models...")
    experiment = experiment_obj()
    iterator, num_iterations = experiment.get_experiments()
    avg_time = 0
    num_skips = 0
    init_weights = None  # This makes sure that the weight for every layer are reset every fold
    for it, params_comb in enumerate(iterator):
        t = clock()
        print("\niteration: {}/{}".format(it + 1, num_iterations))
        params = experiment.get_printable_experiment(params_comb, it + 1, verbose=True)

        # skip experiments that are already found in old results.yaml
        skip_test = params_in_data(params, old_data)
        if skip_test is not False:
            print("The folder {} already contains this model (found in {}). "
                  "Model calculation skipped.".format(folder, skip_test))
            num_skips += 1
            continue

        # run experiments (returns same as flexible_neural_network)
        optimizer, loss, *layers = experiment.run_experiment(input_shape, labels, params_comb)
        parameters = flexible_neural_net(train_set, test_set, optimizer, loss, *layers,
                                         batch_size=batch_size, epochs=epochs,
                                         early_stopping=early_stopping, verbose=verbose,
                                         initial_weights=init_weights)
        [lTr, aTr], [lTe, aTe], time, location, n_epochs, init_weights = parameters

        # originally run_experiment would call flexible_neural_net, or any other experiment
        # function. It was more flexible, but more complicated for the user. Comment the previous
        # 2 lines and uncomment the following lines to get the old model back, or go to commit
        # 8224aa8f89085f5fbab5a86f4529982fbf47b8f8 in github.com/decordoba/deep-learning-with-Keras
        """
        [lTr, aTr], [lTe, aTe], time, location, n_epochs = experiment.run_experiment(train_set,
                                                                            test_set, input_shape,
                                                                            labels, params_comb,
                                                                            epochs)

        class MyFirstExperiment(Experiment):  # Model of an Experiment for the old run_experiment
            def __init__(self):
                self.experiments = {"filters1": [16, 32], "filters2": [16, 32], "units1": [16, 32]}

            def run_experiment(self, train_set, test_set, input_shape, labels, comb, epochs):
                f1 = comb[self.keys_mapper["filters1"]]
                f2 = comb[self.keys_mapper["filters2"]]
                u1 = comb[self.keys_mapper["units1"]]
                return flexible_neural_net(train_set, test_set, optimizers.Adam(),
                        losses.categorical_crossentropy,
                        Conv2D(f1, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
                        Conv2D(f2, kernel_size=(3, 3), activation='relu'),
                        MaxPooling2D(pool_size=(2, 2)), Dropout(0.25), Flatten(),
                        Dense(u1, activation='relu'), Dropout(0.5),
                        Dense(len(labels), activation='softmax'),
                        batch_size=32, epochs=epochs, verbose=False)
        """

        result = {"lossTr": float(lTr), "accTr": float(aTr),
                  "lossTe": float(lTe), "accTe": float(aTe),
                  "time": float(time), "location": location,
                  "number_epochs": n_epochs}

        # save results to result.yaml
        with open("results.yaml", "a") as f:
            f.write(yaml.dump_all([{location: {"params": params,
                                               "result": result}}],
                                  default_flow_style=False,
                                  explicit_start=False))

        # print data to monitor how well we are doing
        taken = clock() - t
        avg_time = (avg_time * (it - num_skips) + taken) / (it - num_skips + 1)
        print("\nResults:  Training:  Acc: {:<10}  Loss: {}".format(round(aTr, 8), round(lTr, 8)))
        print("          Test:      Acc: {:<10}  Loss: {}".format(round(aTe, 8), round(lTe, 8)))
        print("          Number of Epochs:   {}".format(n_epochs))
        print("          Time taken:         {}  (fit & evaluation time: {})"
              "".format(timedelta(seconds=taken), timedelta(seconds=time)))
        print("          Expected time left: {}  (mean time: {})"
              "".format(timedelta(seconds=avg_time * (num_iterations - it - 1)),
                        timedelta(seconds=avg_time)))

    os.chdir("./..")
    return folder