Esempio n. 1
0
def pipeline(args, grade_name, train_group, test_group):
    # Check save path
    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path, exist_ok=True)

    # Load grades to array
    grades_train, hdr_train_grade = load_excel(args.train_path,
                                               titles=[grade_name])
    grades_test, hdr_test_grade = load_excel(args.test_path,
                                             titles=[grade_name])

    # N of subvolumes
    n_test = args.n_subvolumes_test
    n_train = args.n_subvolumes_train

    # Duplicate grades for subvolumes
    grades_train = duplicate_vector(grades_train.squeeze(),
                                    n_train,
                                    reshape=True)
    hdr_train_grade = duplicate_vector(hdr_train_grade, n_train, reshape=True)
    grades_test = duplicate_vector(grades_test.squeeze(), n_test, reshape=True)
    hdr_test_grade = duplicate_vector(hdr_test_grade, n_test, reshape=True)

    # Load features
    f_train, hdr_train_f = load_excel(args.train_f_path + grade_name + '_' +
                                      str(args.n_components) + '.xlsx')
    f_test, hdr_test_f = load_excel(args.test_f_path + grade_name + '_' +
                                    str(args.n_components) + '.xlsx')

    # PCA
    pca_train, score_train = scikit_pca(f_train.T,
                                        args.n_components,
                                        whitening=True,
                                        solver='auto')
    pca_test, score_test = scikit_pca(f_test.T,
                                      args.n_components,
                                      whitening=True,
                                      solver='auto')

    # Reshape PCA score (sample, PCA component) --> (sample, subvolume, PCA component)
    dims = score_train.shape
    dims_test = score_test.shape
    score_train = np.reshape(score_train,
                             (dims[0] // n_train, n_train, dims[1]))
    hdr_train_f = np.reshape(hdr_train_f, (dims[0] // n_train, n_train))
    score_test = np.reshape(score_test,
                            (dims_test[0] // n_test, n_test, dims_test[1]))
    hdr_test_f = np.reshape(hdr_test_f, (dims_test[0] // n_test, n_test))

    # Linear and logistic regression
    pred_linear, weights = torch_regression(score_train,
                                            score_test,
                                            grades_train,
                                            grades_test,
                                            savepath=args.save_path +
                                            '\\torch_' + grade_name + '_' +
                                            str(args.n_components))

    # Combined grades
    # grades = np.concatenate((grades_train, grades_test))
    # hdr_grades = np.concatenate((hdr_train_grade,  hdr_test_grade))
    grades = grades_train
    hdr_grades = hdr_train_grade
    pred_linear = pred_linear.T

    # ROC curves
    fpr, tpr, thresholds = roc_curve(grades > 0,
                                     np.round(pred_linear) > 0,
                                     pos_label=1)
    auc_linear = auc(fpr, tpr)

    # Spearman corr
    rho = spearmanr(grades, pred_linear)
    # Wilcoxon p
    wilc = wilcoxon(grades, pred_linear)
    # R^2 value
    r2 = r2_score(grades, pred_linear.flatten())
    # Mean squared error
    mse_linear = mean_squared_error(grades, pred_linear)
    mse_boot, l_mse, h_mse = mse_bootstrap(grades, pred_linear)

    # Stats
    print('Mean squared error, Area under curve (linear)')
    print(mse_linear, auc_linear)
    print(r'Spearman: {0}, p: {1}, Wilcoxon p: {2}, r2: {3}'.format(
        rho[0], rho[1], wilc[1], r2))

    # Scatter plot actual vs prediction
    m, b = np.polyfit(grades, pred_linear.flatten(), 1)
    fig = plt.figure(figsize=(6, 6))
    ax2 = fig.add_subplot(111)
    ax2.scatter(grades, pred_linear.flatten())
    ax2.plot(grades, m * grades + b, '-', color='r')
    ax2.set_xlabel('Actual grade')
    ax2.set_ylabel('Predicted')
    text_string = 'MSE: {0:.2f}, [{1:.2f}, {2:.2f}]\nSpearman: {3:.2f}\nWilcoxon: {4:.2f}\n$R^2$: {5:.2f}' \
        .format(mse_boot, l_mse, h_mse, rho[0], wilc[1], r2)
    ax2.text(0.05,
             0.95,
             text_string,
             transform=ax2.transAxes,
             fontsize=14,
             verticalalignment='top')
    for k in range(len(grades)):
        txt = hdr_grades[k] + str(grades[k])
        ax2.annotate(txt, xy=(grades[k], pred_linear[k]), color='r')
    plt.savefig(args.save_path + '\\linear_' + grade_name + '_' +
                str(args.n_components) + '_' + args.split,
                bbox_inches='tight')
    plt.close()
    datapath = r'/media/dios/dios2/3DHistoData'
    # datapath = r'X:/3DHistoData'
    arguments = arg.return_args(datapath,
                                choice,
                                pars=arg.set_surf_loo,
                                grade_list=arg.grades_cut)
    arguments.save_path = arguments.save_path
    combinator = np.mean

    arguments.binary_model = 'LOG'

    # LOGO for 2mm samples
    if choice == '2mm':
        arguments.split = 'logo'
        arguments.train_regression = True
        groups, _ = load_excel(arguments.grade_path, titles=['groups'])
        groups = groups.flatten()
    elif choice == 'Isokerays' or choice == 'Isokerays_sub':
        arguments.train_regression = False
        arguments.split = 'logo'
        #arguments.n_subvolumes = 9
        if arguments.n_subvolumes > 1:
            arguments.save_path = arguments.save_path + '_' + str(
                arguments.n_subvolumes) + 'subs'
            arguments.feature_path = arguments.save_path + '/Features'
        os.makedirs(arguments.save_path, exist_ok=True)
        os.makedirs(arguments.save_path + '/' + 'Images', exist_ok=True)
        groups, _ = load_excel(arguments.grade_path, titles=['groups'])
        groups = groups.flatten()
    else:
        os.makedirs(arguments.save_path, exist_ok=True)
def MRELBP(image,
           parameters,
           eps=1e-06,
           normalize=False,
           args=None,
           sample=None):
    """ Takes Median Robust Extended Local Binary Pattern from image im
    Uses n neighbours from radii r_large and r_small, r_large must be larger than r_small
    Median filter uses kernel sizes weight_center for center pixels, w_r[0] for larger radius and w_r[1]
    #or smaller radius
    Grayscale values are centered at their mean and scales with global standad deviation

    Parameters
    ----------
    image : ndarray
        Input image. Standardized to local contrast in the pipelines.
    parameters : dict
        Dictionary containing LBP parameters:
        N = Number of neighbours used in MRELBP (4 orthogonal and 4 diagonal neighbours).
        R = Distance of center pixel from neighbours used in obtaining large image.
        r = Distance of center pixel from neighbours used in obtaining small image.
        wc = Kernel size used in median filtering center image.
        wl = Kernel size used in median filtering large LBP image.
        ws = Kernel size used in median filtering small LBP image.
    eps : float
        Error residual. Defaults to 1e-6
    normalize : bool
        Choice whether to normalize LBP histograms by sum.
    args : str
        Path for saving LBP images.
    sample : str
        Name of the sample used in saving images.
    Returns
    -------
    MRELBP histograms calculated with rotation invariant uniform mapping.
    Length of 32 (2 center + 10 large + 10 small + 10 radial).
    """

    n = parameters['N']
    r_large = parameters['R']
    r_small = parameters['r']
    weight_center = parameters['wc']
    weight_large = parameters['wl']
    weight_small = parameters['ws']

    # Mean grayscale value and std
    mean_image = image.mean()
    std_image = image.std()

    # Centering and scaling with std
    image_scaled = (image - mean_image) / std_image

    # Median filtering
    image_center = medfilt2d(image_scaled.copy(), weight_center)
    # Center pixels
    dist = round(r_large + (weight_large - 1) / 2)
    image_center = image_center[dist:-dist, dist:-dist]
    # Subtracting the mean pixel value from center pixels
    image_center -= image_center.mean()
    # Binning center pixels
    center_hist = np.zeros((1, 2))
    center_hist[0, 0] = np.sum(image_center >= 0)
    center_hist[0, 1] = np.sum(image_center < 0)

    # --------------- #
    # center_hist[0,0] = np.sum(image_center>=-1e-06)
    # center_hist[0,1] = np.sum(image_center<-1e-06)
    # --------------- #

    # Median filtered images for large and small radius
    image_large = medfilt2d(image_scaled.copy(), weight_large)
    image_small = medfilt2d(image_scaled.copy(), weight_small)

    # Neighbours
    pi = np.pi
    # Empty arrays for the neighbours
    row, col = np.shape(image_center)
    n_large = np.zeros((row, col, n))
    n_small = np.zeros((row, col, n))

    for k in range(n):
        # Angle to the neighbour
        theta = k * (-1 * 2 * pi / n)
        # Large neighbourhood
        x = dist + r_large * np.cos(theta)
        y = dist + r_large * np.sin(theta)
        if abs(x - round(x)) < eps and abs(y - round(y)) < eps:
            x = int(round(x))
            y = int(round(y))
            p = image_large[y:y + row, x:x + col]
        else:
            p = image_bilinear(image_large, col, x, row, y)
        n_large[:, :, k] = p
        # Small neighbourhood
        x = dist + r_small * np.cos(theta)
        y = dist + r_small * np.sin(theta)
        if abs(x - round(x)) < eps and abs(y - round(y)) < eps:
            x = int(round(x))
            y = int(round(y))
            p = image_small[y:y + row, x:x + col]
        else:
            p = image_bilinear(image_small, col, x, row, y)
        n_small[:, :, k] = p

    # Thresholding radial neighbourhood
    n_radial = n_large - n_small

    # Subtraction of means
    mean_large = n_large.mean(axis=2)
    mean_small = n_small.mean(axis=2)
    for k in range(n):
        n_large[:, :, k] -= mean_large
        n_small[:, :, k] -= mean_small

    # Converting to binary images and taking the lbp values

    # Initialization of arrays
    lbp_large = np.zeros((row, col))
    lbp_small = np.zeros((row, col))
    lbp_radial = np.zeros((row, col))

    for k in range(n):
        lbp_large += (n_large[:, :, k] >=
                      0) * 2**k  # NOTE ACCURACY FOR THRESHOLDING!!!
        lbp_small += (n_small[:, :, k] >= 0) * 2**k
        lbp_radial += (n_radial[:, :, k] >= 0) * 2**k
        # --------------- #
        # lbp_large += (n_large[:,:,k] >= -(eps ** 2)) * 2 ** k  # NOTE ACCURACY FOR THRESHOLDING!!!
        # lbp_small += (n_small[:,:,k] >= -(eps ** 2)) * 2 ** k
        # lbp_radial += (n_radial[:,:,k] >= -(eps ** 2)) * 2 ** k
        # --------------- #

    # Calculating histograms with 2 ^ N bins
    large_hist = np.zeros((1, 2**n))
    small_hist = np.zeros((1, 2**n))
    radial_hist = np.zeros((1, 2**n))
    for k in range(2**n):
        large_hist[0, k] = np.sum(lbp_large == k)
        small_hist[0, k] = np.sum(lbp_small == k)
        radial_hist[0, k] = np.sum(lbp_radial == k)

    # Rotation invariant uniform mapping
    mapping = get_mapping(n)
    large_hist = map_lbp(large_hist, mapping)
    small_hist = map_lbp(small_hist, mapping)
    radial_hist = map_lbp(radial_hist, mapping)

    # # Individual histogram normalization
    # if  normalize:
    #     center_hist /= np.sum(center_hist)
    #     large_hist /= np.sum(large_hist)
    #     small_hist /= np.sum(small_hist)
    #     radial_hist /= np.sum(radial_hist)

    # Concatenate histograms
    hist = np.concatenate((center_hist, large_hist, small_hist, radial_hist),
                          1)

    if normalize:
        hist /= np.sum(hist)

    if args.save_images and args is not None and (('21_L3L' in sample) or
                                                  ('20_R2M' in sample)):

        # Map LBP images
        lbp_large_mapped = map_lbp(lbp_large, mapping)
        lbp_small_mapped = map_lbp(lbp_small, mapping)
        lbp_radial_mapped = map_lbp(lbp_radial, mapping)
        lbp_list = [lbp_large_mapped, lbp_small_mapped, lbp_radial_mapped]

        # Load coefficients
        coefs, _ = load_excel(args.save_path + '/' + 'weights_surf_sub.xlsx',
                              titles=['Weights_lin', 'Weights_log'])
        thresh = 0.1
        lin = coefs[0]
        log = coefs[1]
        lin = np.abs(np.insert(lin, [2, 9, 10, 17], 0)) > thresh
        log = np.abs(np.insert(log, [2, 9, 10, 17], 0)) > thresh

        masks = [
            np.zeros(lbp_large.shape),
            np.zeros(lbp_large.shape),
            np.zeros(lbp_large.shape)
        ]

        for mask in range(len(masks)):
            for ind in range(int(np.max(lbp_large_mapped)) + 1):
                masks[mask] += (ind + 1) * (lbp_list[mask]
                                            == ind) * log[2 + mask * 10:2 +
                                                          (mask + 1) * 10][ind]

        # No instances in LBP_large (0,8) and LBP_small (0,8)
        print_images(lbp_list,
                     subtitles=['Large', 'Small', 'Radial'],
                     title=sample,
                     sample=sample + '.png')

    # Print center image
    fig = plt.figure(dpi=300)
    ax = fig.add_subplot(111)
    ax.imshow(image_center >= 0)
    plt.title('Center')
    plt.savefig(args.save_path + '/Images/LBP/' + sample + '_center.png',
                transparent=True)
    plt.close()

    # Print unmapped LBP
    #print_images([lbp_large, lbp_small, lbp_radial], subtitles=['Large', 'Small', 'Radial'], title=sample,
    #             save_path=args.save_path + '/Images/LBP/', sample=sample + '.png')

    return hist
Esempio n. 4
0
def pipeline_hyperopt(args, files, metric, pat_groups=None):
    """Pipeline for Bayesian optimization.
    1. Loads images and ground truth.
    2. Calls the optimization function and displays result.

    Parameters
    ----------
    args : Namespace
        Namespace containing grading arguments. See grading_pipelines for detailed description.
    files : list
        List of sample datasets containing mean+std images.
    metric : function
        Loss function used for optimization.
        Defaults to sklearn.metrics.mean_squared error
        Possible to use for example 1 - spearman correlation or other custom loss functions.
    pat_groups : ndarray
        Groups for leave-one-group-out split.
    """

    # Load images
    images_surf = []
    images_deep = []
    images_calc = []
    for k in range(len(files)):
        # Load images
        image_surf, image_deep, image_calc = load_vois_h5(
            args.image_path, files[k])

        # Automatic corner crop for deep and calcified zones
        image_deep, cropped_deep = auto_corner_crop(image_deep)
        if cropped_deep:
            print(
                'Automatically cropped sample {0}, deep zone to shape: ({1}, {2})'
                .format(files[k][:-3], image_deep.shape[0],
                        image_deep.shape[1]))
        image_calc, cropped_calc = auto_corner_crop(image_calc)
        if cropped_calc:
            print(
                'Automatically cropped sample {0}, calcified zone to shape: ({1}, {2})'
                .format(files[k][:-3], image_calc.shape[0],
                        image_calc.shape[1]))

        # Append to list
        images_surf.append(image_surf)
        images_deep.append(image_deep)
        images_calc.append(image_calc)

    # Load grades to array
    grades, hdr_grades = load_excel(arguments.grade_path,
                                    titles=[arguments.grades_used])
    grades = grades.squeeze()
    # Sort grades based on alphabetical order
    grades = np.array([
        grade
        for _, grade in sorted(zip(hdr_grades, grades), key=lambda var: var[0])
    ])
    if arguments.n_subvolumes > 1:
        # Extend grades variable
        grades = np.array(
            [val for val in grades for _ in range(arguments.n_subvolumes)])

    # Select VOI
    if args.grades_used[:4] == 'surf':
        images = images_surf[:]
    elif args.grades_used[:4] == 'deep':
        images = images_deep[:]
    elif args.grades_used[:4] == 'calc':
        images = images_calc[:]
    else:
        raise Exception('Check selected zone!')
    # Optimize parameters
    pars, error = optimization_hyperopt_loo(np.array(images),
                                            grades,
                                            args,
                                            metric,
                                            groups=pat_groups)

    print('Results for grades: ' + args.grades_used)
    print("Parameters are:\n", pars)
    for i in range(len(pars)):
        print(pars[i])
def pipeline_prediction(args,
                        grade_name,
                        pat_groups=None,
                        check_samples=False,
                        combiner=np.mean):
    """Gets predictions from saved MRELBP features.

    1. Loads features and ground truth from .xlsx file

    2. Sort samples alphabetically and remove zero features. Optional centering for features.

    3. PCA dimensionality reduction.

    4. Linear and logistic regression.

    5. Create result plots.

    Parameters
    ----------
    args : Namespace
        All grading arguments parsed into a namespace:
        n_subvolumes = Amount of subvolumes input image is splitted into.
        grade_path = Path to ground truth.
        feature_path = Path to MRELBP features.
        save_path = Path to save results.
        train_regression = Choice whether to train a new model or evaluate on an existing one.
        standardization = Choice whether to center features before PCA.
        split = Cross-validation split used in training the model.
        logistic_limit = Limit used to make logistic prediction.
        convert_grades = Choice whether to predict optionally exp or log of grades.

    grade_name : str
        Title of the predicted grade (should be given on first row of the Excel file).
    pat_groups : ndarray (1-dimensional)
        patient groups for training with leave-one-group-out -split.
    check_samples : bool
        Choice whether to print all names of ground truth and features.
        Used to make sure that features and ground truth match (debugging)
    combiner : function
        Method to combine predictions of multiple subimages. Defaults to mean of predictions.
        Other possibilities: np.max, np.median
    Returns
    -------
    Ground truth, logistic predictions (for ROC curves), mean standard error.
    """

    # Load grades to array
    grades, hdr_grades = load_excel(args.grade_path, titles=[grade_name])

    # Sort grades based on alphabetical order
    grades = np.array([
        grade for _, grade in sorted(zip(hdr_grades, grades.squeeze()),
                                     key=lambda var: var[0])
    ])

    # Limit for logistic regression
    bound = args.logistic_limit

    # Load features from subvolumes
    if args.n_subvolumes > 1 and not args.train_regression:
        feature_list, means = [], []
        for vol in range(args.n_subvolumes):
            features, hdr_features = load_excel(args.feature_path + '/' +
                                                grade_name + '_' + str(vol) +
                                                '.xlsx')
            # Remove zero features
            features = features[~np.all(features == 0, axis=1)]
            feature_list.append(features)
            # Mean feature
            mean_sub = np.mean(features, 1)
            means.append(mean_sub)
        mean = np.mean(means, axis=0)
    # Load features without subvolumes
    else:
        features, hdr_features = load_excel(args.feature_path + '/' +
                                            grade_name + '.xlsx')
        # Remove zero features
        features = features[~np.all(features == 0, axis=1)]
        # Mean feature
        mean = np.mean(features, 1)

        if args.n_subvolumes > 1:
            # Extend grades variable
            grades = np.array(
                [val for val in grades for _ in range(args.n_subvolumes)])

        # Check matching samples
        if check_samples:
            print('Loaded grades (g) and features (f)')
            for i in range(grades.shape[0]):
                print('g, {0}, \tf {1}\t g_s {2}'.format(
                    hdr_grades[i], hdr_features[i], grades[i]))

    #
    # Train regression models
    #

    if args.train_regression:
        print('\nTraining regression model on: {0}'.format(grade_name))

        if bound != 1:
            print('Limit is set to {0}'.format(bound))
        # Define split
        if args.split == 'logo' and pat_groups is not None:
            lin_regressor = regress_logo
            log_regressor = logistic_logo
        elif args.split == 'loo' or pat_groups is None:
            lin_regressor = regress_loo
            log_regressor = logistic_loo
        else:
            raise Exception(
                'No valid cross-validation split selected (see arguments)!')

        # Standardize features
        if args.standardization == 'centering':
            features = features.T - mean
        else:
            features = standardize(features.T, axis=0)

        # PCA
        if args.use_PCA:
            pca, score = scikit_pca(features,
                                    args.n_components,
                                    whitening=True,
                                    solver='auto')
            eigenvectors = pca.components_
            singular_values = pca.singular_values_ / np.sqrt(
                features.shape[1] - 1)
        else:
            score = features
            eigenvectors = np.zeros((features.shape[1], features.shape[1]))
            singular_values = np.zeros(features.shape[1])

        # Regression
        pred_linear, weights, intercept_lin = lin_regressor(
            score,
            grades,
            groups=pat_groups,
            alpha=args.alpha,
            method=args.regression,
            convert=args.convert_grades)
        if args.binary_model == 'LOG':
            pred_logistic, weights_log, intercept_log = log_regressor(
                score, grades > bound, groups=pat_groups)
        elif args.binary_model == 'RF':
            pred_logistic, weights_log, intercept_log = rforest_logo(
                score,
                grades > bound,
                groups=pat_groups,
                #savepath=args.save_path, zone=grade_name)
                zone=grade_name)

        pca_regress_pipeline_log(features,
                                 grades,
                                 pat_groups,
                                 n_components=args.n_components,
                                 grade_name=grade_name,
                                 savepath=f'{args.save_path}/Shap_')

        # Save calculated weights
        print(f'Intercepts: {intercept_log}, {intercept_lin}')
        model_root = os.path.dirname(args.save_path)
        write_binary_weights(model_root + '/' + grade_name + '_weights.dat',
                             score.shape[1], eigenvectors, singular_values,
                             weights.flatten(), weights_log.flatten(), mean,
                             [intercept_lin, intercept_log])

        # Save the weights in excel
        writer = pd.ExcelWriter(args.save_path + '/weights_' + grade_name +
                                '.xlsx')
        list_weights = [
            weights,
            pca.inverse_transform(weights) + mean, weights_log,
            pca.inverse_transform(weights_log) + mean
        ]
        list_w_names = [
            'Weights_lin_PCA', 'Weights_lin', 'Weights_log_PCA', 'Weights_log'
        ]

        dfs = []
        for w in range(len(list_weights)):
            dfs.append(pd.DataFrame({list_w_names[w]: list_weights[w]}))
        df = pd.concat(dfs, axis=1)

        df.to_excel(writer, sheet_name='Weights')

        # Save PCA eigenvectors
        dfs = []
        for w in range(eigenvectors.shape[0]):
            dfs.append(pd.DataFrame({'PC' + str(w + 1): eigenvectors[w, :]}))
        df = pd.concat(dfs, axis=1)
        df.to_excel(writer, sheet_name='PCA eigenvectors')

        writer.save()

    #
    # Use pretrained models
    #

    else:
        print('\nEvaluating with saved model weights on: {0}\n'.format(
            grade_name))
        model_root = os.path.dirname(args.save_path)
        if args.n_subvolumes > 1:
            preds_lin, preds_log, scores = [], [], []
            for vol in range(args.n_subvolumes):
                pred_linear_sub, pred_logistic_sub, score_sub = evaluate_model(
                    feature_list[vol], args,
                    model_root + '/' + grade_name + '_weights.dat')
                preds_lin.append(pred_linear_sub)
                preds_log.append(pred_logistic_sub)
                scores.append(score_sub)

            pred_linear = combiner(np.array(preds_lin), axis=0)
            pred_logistic = combiner(np.array(preds_log), axis=0)
            score = combiner(np.array(scores), axis=0)
        else:
            pred_linear, pred_logistic, score = evaluate_model(
                features, args, model_root + '/' + grade_name + '_weights.dat')

    # Reference for pretrained PCA
    # reference_regress(features, args, score, grade_name + '_weights.dat', pred_linear, pred_logistic)

    # Logistic statistics
    auc_logistic = roc_auc_score(grades > bound, pred_logistic)
    prec, recall, _, support = precision_recall_fscore_support(
        grades > bound,
        pred_logistic > args.log_pred_threshold,
        average='binary')
    f1 = f1_score(grades > bound, pred_logistic > args.log_pred_threshold)
    accuracy = accuracy_score(grades > bound,
                              pred_logistic > args.log_pred_threshold)
    conf_matrix = confusion_matrix(grades > bound,
                                   pred_logistic > args.log_pred_threshold)

    # Spearman corr
    rho, pval = spearmanr(grades, pred_linear)
    # Wilcoxon p
    wilc = wilcoxon(grades, pred_linear)
    # R^2 value
    r2 = r2_score(grades, pred_linear.flatten())
    # Mean squared error
    mse_linear = mean_squared_error(grades, pred_linear)

    # Handle edge cases
    for p in range(len(pred_linear)):
        if pred_linear[p] < 0:
            pred_linear[p] = 0
        if pred_linear[p] > max(grades):
            pred_linear[p] = max(grades)

    # Save prediction
    try:
        stats = np.zeros(len(grades))
        stats[0] = mse_linear
        stats[2] = auc_logistic
        stats[3] = r2
        tuples = list(
            zip(hdr_grades, grades, pred_linear, abs(grades - pred_linear),
                pred_logistic, stats))
        writer = pd.ExcelWriter(args.save_path + '/prediction_' + grade_name +
                                '.xlsx')
        df1 = pd.DataFrame(tuples,
                           columns=[
                               'Sample', 'Actual grade', 'Prediction',
                               'Difference', 'Logistic prediction',
                               'MSE, auc_logistic, r^2'
                           ])
        df1.to_excel(writer, sheet_name='Prediction')
        writer.save()
    except ValueError:
        print('Could not save predictions')

    # Display results
    text_string = 'MSE: {0:.2f}\nSpearman, p: {1:.2f}, {2:.4f}\nWilcoxon sum, p: {3:.2f}, {4:.2f}\n$R^2$: {5:.2f}' \
        .format(mse_linear, rho, pval, wilc[0], wilc[1], r2)
    logistic_results = 'AUC: {0:.3f}\nPrecision: {1:.3f}\nRecall/sensitivity: {2:.3f}\nAccuracy: {3:.3f}\nf1 {4:.3f}' \
        .format(auc_logistic, prec, recall, accuracy, f1)
    print(text_string, '\n', logistic_results)
    print('Number of components: ', score.shape[1])
    save_lin = args.save_path + '/linear_' + grade_name + '_' + args.split
    # Draw linear plot
    #plot_linear(grades, pred_linear, text_string=text_string, plt_title=grade_name, savepath=save_lin)
    plot_linear(grades,
                pred_linear,
                text_string=None,
                plt_title=grade_name,
                savepath=save_lin)
    """
    # Plot PCA components
    save_pca = args.save_path + '/pca_' + grade_name + '_' + args.split
    save_pca_ani = args.save_path + '/pca_animation_' + grade_name + '_' + args.split
    if score.shape[1] == 3:
        plot_array_3d(score, savepath=save_pca, plt_title=grade_name, grades=grades)
        #plot_array_3d_animation(score, save_pca_ani, plt_title=grade_name, grades=grades)
    elif score.shape[1] == 2:
        plot_array_2d(score, savepath=save_pca, plt_title=grade_name, grades=grades)

    # Plot grade distributions
    plot_histograms(grades, plt_title=grade_name, savepath=args.save_path + '//distribution_' + grade_name)
    """
    return grades, pred_logistic, conf_matrix