def get_model_differences(args, data_handler, biomarker, offsets):
    print log.INFO, 'Comparing models for {0}...'.format(biomarker)

    model_file = data_handler.get_model_file(biomarker)
    print model_file
    if not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}'.format(model_file)
        return
    donohue_model_file = os.path.join(data_handler._conf.models_folder,
                                      'denohue', 'population_{0}.csv'.format(biomarker.replace(' ', '.')))
    if not os.path.isfile(donohue_model_file):
        print log.ERROR, 'Donohue Model file not found: {0}'.format(model_file)
        return

    # Read Donohue model
    r = mlab.csv2rec(donohue_model_file)
    progrs = r[r.dtype.names[0]] * 30.44
    progrs = progrs[::100]
    vals_donohue = r[r.dtype.names[1]]
    vals_donohue = vals_donohue[::100]

    # Read my model
    pm = ProgressionModel(biomarker, model_file, extrapolator=args.extrapolator)
    diffs = np.empty(len(offsets))
    for i, offset in enumerate(offsets):
        vals_mine = pm.get_quantile_curve(progrs + offset, 0.5)
        normalizer = max(np.max(vals_mine), np.max(vals_donohue))
        diffs[i] = np.mean(np.abs(vals_donohue - vals_mine)) /normalizer

    return diffs
예제 #2
0
def get_model_differences(args, data_handler, biomarker, offsets):
    print log.INFO, 'Comparing models for {0}...'.format(biomarker)

    model_file = data_handler.get_model_file(biomarker)
    print model_file
    if not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}'.format(model_file)
        return
    donohue_model_file = os.path.join(
        data_handler._conf.models_folder, 'denohue',
        'population_{0}.csv'.format(biomarker.replace(' ', '.')))
    if not os.path.isfile(donohue_model_file):
        print log.ERROR, 'Donohue Model file not found: {0}'.format(model_file)
        return

    # Read Donohue model
    r = mlab.csv2rec(donohue_model_file)
    progrs = r[r.dtype.names[0]] * 30.44
    progrs = progrs[::100]
    vals_donohue = r[r.dtype.names[1]]
    vals_donohue = vals_donohue[::100]

    # Read my model
    pm = ProgressionModel(biomarker,
                          model_file,
                          extrapolator=args.extrapolator)
    diffs = np.empty(len(offsets))
    for i, offset in enumerate(offsets):
        vals_mine = pm.get_quantile_curve(progrs + offset, 0.5)
        normalizer = max(np.max(vals_mine), np.max(vals_donohue))
        diffs[i] = np.mean(np.abs(vals_donohue - vals_mine)) / normalizer

    return diffs
def evaluate_synth_model(model_file, biomarker, progress_linspace, number_of_value_steps, metric='area'):
    # Define progress steps
    pm = ProgressionModel(biomarker, model_file)
    progresses = np.linspace(progress_linspace[0],
                             progress_linspace[1],
                             progress_linspace[2])

    # Define value steps
    min_val = float('inf')
    max_val = float('-inf')
    for quantile in [0.01, 0.99]:
        curve = pm.get_quantile_curve(progresses, quantile)
        min_val = min(min_val, np.min(curve))
        max_val = max(max_val, np.max(curve))
    values = np.linspace(min_val, max_val, number_of_value_steps)

    # Get mean error
    error = 0
    if metric == 'area':
        for progr in progresses:
            probs_model = [SynthModel.get_probability(biomarker, progr, v) for v in values]
            probs_fit = pm.get_density_distribution(values, progr)
            error += np.sum(np.abs(np.array(probs_fit) - np.array(probs_model)))
        error *= (values[1] - values[0]) / len(progresses)
    elif metric == 'peakdist':
        for progr in progresses:
            probs_model = [SynthModel.get_probability(biomarker, progr, v) for v in values]
            probs_fit = pm.get_density_distribution(values, progr)
            peak_model = values[np.argsort(probs_model)[-1]]
            peak_fit = values[np.argsort(probs_fit)[-1]]
            error += np.abs(peak_fit - peak_model)
        error /= len(progresses)
    elif metric == 'maxdist':
        for value in values:
            probs_model = [SynthModel.get_probability(biomarker, p, value) for p in progresses]
            probs_fit = [pm.get_density_distribution([value], p) for p in progresses]
            max_model = progresses[np.argsort(probs_model)[-1]]
            max_fit = progresses[np.argsort(probs_fit)[-1]]
            error += np.abs(max_fit - max_model)
        error /= len(values)
    else:
        print log.ERROR, 'Metric unknown: {0}'.format(metric)

    return error
def evaluate_biomarker_disc(args, data_handler, biomarker):
    model_file = data_handler.get_model_file(biomarker)
    eval_file = model_file.replace('.csv', '_eval_{0}.csv'.format(args.metric))

    if os.path.isfile(eval_file):
        print log.SKIP, 'Evaluation file already existing: {0}'.format(eval_file)
    elif not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}!'.format(model_file)
    else:
        model = ProgressionModel(biomarker, model_file)
        fitter = ModelFitter(model)

        # Determine value and progress interval
        min_value, max_value = model.get_value_range(quantiles=args.quantiles)
        values = np.linspace(min_value, max_value, args.value_samples)
        progresses = np.linspace(model.min_progress, model.max_progress, args.progress_samples)
        print log.INFO, 'Evaluating {0} steps in value interval [{1}, {2}]...'.format(args.value_samples, min_value, max_value)
        print log.INFO, 'Evaluating {0} steps in progress interval [{1}, {2}]...'.format(args.progress_samples, model.min_progress, model.max_progress)
        value_step = values[1] - values[0]

        # Compute error
        writer = csv.writer(open(eval_file, 'wb'), delimiter=',')
        writer.writerow(['progress', 'error'])

        total_error = 0
        for progress in progresses:
            sample_error = 0
            for value in values:
                prob_value = model.get_probability_value(value, progress)
                samples = {'bl': {'scantime': 0, biomarker: value}}
                estimated_dpi = fitter.get_dpi_for_samples(samples, phase=args.args)
                sample_error += prob_value * np.square(progress - estimated_dpi)
            sample_error = math.sqrt(value_step * sample_error / len(values))
            total_error += sample_error

            writer.writerow([progress, sample_error])
            print log.RESULT, 'Error for progress {0}: {1}'.format(progress, sample_error)

        total_error /= len(progresses)
        print log.RESULT, 'Total error: {0}'.format(total_error)
예제 #5
0
def evaluate_synth_model(model_file,
                         biomarker,
                         progress_linspace,
                         number_of_value_steps,
                         metric='area'):
    # Define progress steps
    pm = ProgressionModel(biomarker, model_file)
    progresses = np.linspace(progress_linspace[0], progress_linspace[1],
                             progress_linspace[2])

    # Define value steps
    min_val = float('inf')
    max_val = float('-inf')
    for quantile in [0.01, 0.99]:
        curve = pm.get_quantile_curve(progresses, quantile)
        min_val = min(min_val, np.min(curve))
        max_val = max(max_val, np.max(curve))
    values = np.linspace(min_val, max_val, number_of_value_steps)

    # Get mean error
    error = 0
    if metric == 'area':
        for progr in progresses:
            probs_model = [
                SynthModel.get_probability(biomarker, progr, v) for v in values
            ]
            probs_fit = pm.get_density_distribution(values, progr)
            error += np.sum(
                np.abs(np.array(probs_fit) - np.array(probs_model)))
        error *= (values[1] - values[0]) / len(progresses)
    elif metric == 'peakdist':
        for progr in progresses:
            probs_model = [
                SynthModel.get_probability(biomarker, progr, v) for v in values
            ]
            probs_fit = pm.get_density_distribution(values, progr)
            peak_model = values[np.argsort(probs_model)[-1]]
            peak_fit = values[np.argsort(probs_fit)[-1]]
            error += np.abs(peak_fit - peak_model)
        error /= len(progresses)
    elif metric == 'maxdist':
        for value in values:
            probs_model = [
                SynthModel.get_probability(biomarker, p, value)
                for p in progresses
            ]
            probs_fit = [
                pm.get_density_distribution([value], p) for p in progresses
            ]
            max_model = progresses[np.argsort(probs_model)[-1]]
            max_fit = progresses[np.argsort(probs_fit)[-1]]
            error += np.abs(max_fit - max_model)
        error /= len(values)
    else:
        print log.ERROR, 'Metric unknown: {0}'.format(metric)

    return error
def evaluate_biomarker_cover(args, data_handler, biomarker):
    model_file = data_handler.get_model_file(biomarker)
    eval_file = model_file.replace('.csv', '_eval_{0}.csv'.format(args.metric))

    if os.path.isfile(eval_file) and not args.recompute_metric:
        print log.SKIP, 'Evaluation file already existing: {0}'.format(eval_file)
    elif not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}!'.format(model_file)
    else:
        model = ProgressionModel(biomarker, model_file)

        # Determine value and progress interval
        progresses = np.linspace(model.min_progress, model.max_progress, args.progress_samples)
        median_curve = model.get_quantile_curve(progresses, 0.5)
        min_value = np.min(median_curve)
        max_value = np.max(median_curve)

        print log.INFO, 'Evaluating {0} steps in progress interval [{1}, {2}] for values in [{3}, {4}]...'.format(
            args.progress_samples, progresses[0], progresses[-1], min_value, max_value)

        # Compute error
        writer = csv.writer(open(eval_file, 'wb'), delimiter=',')
        writer.writerow(['progress', 'error'])

        # Compute error
        total_error = 0
        for progress in progresses:
            min_q = model.approximate_quantile(progress, min_value)
            max_q = model.approximate_quantile(progress, max_value)
            quantile_range = max_q - min_q
            total_error += quantile_range

            writer.writerow([progress, quantile_range])

        total_error /= len(progresses)
        print log.RESULT, 'Total error {0}: {1}'.format(biomarker, total_error)
def evaluate_experiment(args, biomarker, sampling, viscodes=[0]):
    print log.INFO, 'Evaluating {0} model with {1} samples...'.format(biomarker, args.number_of_training_samples)

    num_visits = max(viscodes) + 1
    errors_experiment = []
    for run in xrange(args.number_of_runs):
        data_handler = SynthDataHandler()
        model_file = data_handler.get_model_file(biomarker, num_samples=args.number_of_training_samples,
                                                 sampling=sampling, run=run)
        error_folder = SynthDataHandler.make_dir(data_handler.get_eval_folder(), biomarker)
        error_file = os.path.join(error_folder, os.path.basename(model_file).replace('.csv', '_test.p'))
        if num_visits > 1:
            error_file = error_file.replace('_test.p', 'v{0}_test.p'.format(num_visits))

        if os.path.isfile(error_file) and not args.recompute_errors:
            print log.SKIP, 'Skipping error computation for {0} samples {1}, run {2}'.format(
                args.number_of_training_samples, sampling, run)
            errors_run = pickle.load(open(error_file, 'rb'))
        else:
            # Generate model
            st.generate_synth_model(biomarker,
                                    recompute_models=args.recompute_models,
                                    num_samples=args.number_of_training_samples,
                                    sampling=sampling, run=run)

            # Initialise fitter
            fitter = ModelFitter(ProgressionModel(biomarker, model_file))

            # Generate test data
            test_data = st.generate_synth_test_data([biomarker],
                                                    args.number_of_test_samples,
                                                    num_visits, run,
                                                    recompute_test_data=args.recompute_test_data)
            errors_run = st.evaluate_synth_fitting(fitter,
                                                   test_data,
                                                   [biomarker],
                                                   viscodes)
            pickle.dump(errors_run, open(error_file, 'wb'))
        errors_experiment.append(np.mean(errors_run))

    return errors_experiment
def plot_biomarker(data_handler, biomarker, measurements, dpi, dpr):
    """
    Plot the model of one biomarker with the fitted values

    :param data_handler: the data handler
    :param biomarker: the biomarker to plot
    :param measurements: the measurements containing the biomarker samples of one subject
    :param dpi: the estimated DPI
    :param dpr: the estimated DPR
    """
    model_file = data_handler.get_model_file(biomarker)
    if not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}'.format(model_file)
        return

    print log.INFO, 'Generating plot for {0}...'.format(biomarker)

    #
    # Read model
    #
    pm = ProgressionModel(biomarker, model_file)
    progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress)
    min_progress_extrapolate = int(pm.min_progress - progress_extrapolate)
    max_progress_extrapolate = int(pm.max_progress + progress_extrapolate)
    progress_linspace_ex1 = np.linspace(min_progress_extrapolate,
                                        pm.min_progress, 20)
    progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60)
    progress_linspace_ex2 = np.linspace(pm.max_progress,
                                        max_progress_extrapolate, 20)

    #
    # Setup plot
    #
    biomarker_string = pt.get_biomarker_string(biomarker)
    figure_width = 6
    fig = plt.figure(figsize=(figure_width, 5))
    ax1 = plt.subplot(1, 1, 1)
    pt.setup_axes(plt, ax1, xgrid=False, ygrid=False)
    ax1.set_title(
        'Model for {0} with fitted sample values'.format(biomarker_string))
    ax1.set_xlabel('Disease progress (days before/after conversion to MCI)')
    ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker))
    ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate)

    #
    # Plot the percentile curves of the fitted model
    #
    ax1.axvline(pm.min_progress, color='0.15', linestyle=':')
    ax1.axvline(pm.max_progress, color='0.15', linestyle=':')

    quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
    grey_values = ['0.4', '0.2', '0', '0.2', '0.4']
    for grey_value, quantile in zip(grey_values, quantiles):
        curve_int = pm.get_quantile_curve(progress_linspace_int, quantile)
        ax1.plot(progress_linspace_int, curve_int, color=grey_value)

        curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile)
        curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile)
        ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value)
        ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value)

        label = 'q = {0}'.format(quantile * 100)
        ax1.text(progress_linspace_int[-1] + 100,
                 curve_int[-1],
                 label,
                 fontsize=10)

    #
    # Plot points
    #
    progr_points = []
    value_points = []
    diagn_points = []
    for visit in measurements[0]:
        if biomarker in measurements[0][visit]:
            progress = measurements[0][visit]['scantime'] * dpr + dpi
            value = measurements[0][visit][biomarker]
            progr_points.append(progress)
            value_points.append(value)
            diagn_points.append(1.0)
            ax1.axvline(progress, color='b', linestyle='--')
            ax1.text(progress + 150, value, visit, color='b', fontsize=10)

    ax1.scatter(progr_points,
                value_points,
                s=25.0,
                color='b',
                edgecolor='none',
                vmin=0.0,
                vmax=1.0,
                alpha=0.9)

    #
    # Draw or save the plot
    #
    plt.tight_layout()
    plt.show()
    plt.close(fig)
예제 #9
0
def evaluate_curves(biomarker_name, num_samples=200, show_plots=False, csig=0):
    biomarker = 'synth_{0}'.format(biomarker_name)
    print log.INFO, 'Evaluating {0} for {1} samples, csig={2}...'.format(biomarker, num_samples, csig)
    donohue_model_path = '/Development/DiseaseProgressionModel/models/donohue/'
    vgam_model_path = '/Development/DiseaseProgressionModel/models/synth/'

    # Setup plot
    if show_plots:
        fig = plt.figure()
        ax = plt.subplot(1, 1, 1)
        pt.setup_axes(plt, ax, xgrid=False, ygrid=False)
        ax.set_title('')
        ax.set_xlabel('')
    else:
        fig = None
        ax = None

    # Initialise values
    offset_donohue = 0  # 182.5
    errors_donohue = []
    errors_vgam_mean = []
    errors_vgam_median = []

    # Get real curve values
    progress_linspace = np.linspace(-1500, 1500)
    mean_curve = [SynthModel.get_mean_value(biomarker, p) for p in progress_linspace]
    median_curve = [SynthModel.get_distributed_value(biomarker, p, cdf=0.5) for p in progress_linspace]

    # Plot synthetic model curve
    if show_plots:
        progress_linspace_synth = np.linspace(-2500, 2500, 100)
        quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
        alphas = [0.4, 0.7, 1.0, 0.7, 0.4]
        for quantile, alpha in zip(quantiles, alphas):
            curve_synth = [SynthModel.get_distributed_value(biomarker, p, cdf=quantile)
                           for p in progress_linspace_synth]
            ax.plot(progress_linspace_synth, curve_synth, color='b', alpha=alpha)
        curve_synth = [SynthModel.get_mean_value(biomarker, p) for p in progress_linspace_synth]
        ax.plot(progress_linspace_synth, curve_synth, '--', color='b')

    # Get values for mean calculation
    end_values = [SynthModel.get_distributed_value(biomarker, progress_linspace[0], cdf=0.001),
                  SynthModel.get_distributed_value(biomarker, progress_linspace[-1], cdf=0.001),
                  SynthModel.get_distributed_value(biomarker, progress_linspace[0], cdf=0.999),
                  SynthModel.get_distributed_value(biomarker, progress_linspace[-1], cdf=0.999)]
    values = np.linspace(min(end_values), max(end_values), 100)
    for run in range(100):
        # Get Donohue model
        donohue_file = os.path.join(donohue_model_path,
                                    'population_value-{0}_csig{1}_run{2}.csv'.format(biomarker_name, csig, run))

        r = mlab.csv2rec(donohue_file)
        progrs = r[r.dtype.names[0]] - offset_donohue
        vals = r[r.dtype.names[1]]

        curve_donohue = []
        progr_donohue = []
        for p in progress_linspace:
            if progrs[0] < p < progrs[-1]:
                i = 1
                while p > progrs[i]:
                    i += 1
                progr_donohue.append(float(progrs[i]))
                curve_donohue.append(float(vals[i]))
            else:
                print log.WARNING, 'Model scope too small... skipping!'
                continue

        # Get VGAM model
        if csig == 0:
            vgam_model_file = os.path.join(vgam_model_path,
                                           '{0}_model_{1}_longitudinal_{2}.csv'.format(biomarker,
                                                                                       num_samples, run))
        else:
            vgam_model_file = os.path.join(vgam_model_path,
                                           '{0}_model_{1}_longitudinal_csig{2}.0_{3}.csv'.format(biomarker,
                                                                                                 num_samples,
                                                                                                 csig, run))

        pm = ProgressionModel(biomarker, vgam_model_file)
        curve_vgam_median = pm.get_quantile_curve(progress_linspace, 0.5)
        curve_vgam_mean = [np.sum(pm.get_density_distribution(values, p) * values /
                                  np.sum(pm.get_density_distribution(values, p))) for p in progress_linspace]

        # Calculate errors
        errors_donohue.append(np.mean(np.abs(np.array(curve_donohue) - np.array(mean_curve))))
        errors_vgam_mean.append(np.mean(np.abs(np.array(curve_vgam_mean) - np.array(mean_curve))))
        errors_vgam_median.append(np.mean(np.abs(np.array(curve_vgam_median) - np.array(median_curve))))

        if show_plots:
            ax.plot(progr_donohue, curve_donohue, '--', color='g', alpha=0.2, linewidth=2)
            # ax.plot(progress_linspace, curve_vgam_median, '-', color='r', alpha=0.2, linewidth=2)
            ax.plot(progress_linspace, curve_vgam_mean, '--', color='r', alpha=0.2, linewidth=2)

    print log.RESULT, 'Donohue (mean):', np.mean(errors_donohue), np.var(errors_donohue)
    print log.RESULT, 'VGAM    (mean):', np.mean(errors_vgam_mean), np.var(errors_vgam_mean)
    print log.RESULT, 'VGAM  (median):', np.mean(errors_vgam_median), np.var(errors_vgam_median)

    # Draw or save the plot
    if show_plots:
        plt.tight_layout()
        plt.show()
        plt.close(fig)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for')
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained')
    parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename')
    args = parser.parse_args()

    # Collect data for test
    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)
    biomarkers = data_handler.get_biomarker_names()
    measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12'],
                                                         biomarkers=biomarkers,
                                                         select_training_set=True,
                                                         select_complete=True)

    # Setup plotting folder
    eval_folder = DataHandler.make_dir(data_handler.get_eval_folder(), 'quants')

    # Process all biomarkers
    for biomarker in biomarkers:
        print log.INFO, 'Generating quantile correlation plot for {0}...'.format(biomarker)
        model_file = data_handler.get_model_file(biomarker)
        pm = ProgressionModel(biomarker, model_file)

        q_file = os.path.join(eval_folder, '{0}.p'.format(biomarker))

        if os.path.isfile(q_file):
            (q_bl, q_m12) = pickle.load(open(q_file, 'rb'))
        else:
            q_bl = []
            q_m12 = []

            for rid in measurements:
                val_bl = measurements[rid]['bl'][biomarker]
                val_m12 = measurements[rid]['m12'][biomarker]

                p_bl = measurements[rid]['bl']['progress']
                p_m12 = measurements[rid]['m12']['progress']

                q_bl.append(pm.approximate_quantile(p_bl, val_bl))
                q_m12.append(pm.approximate_quantile(p_m12, val_m12))

            pickle.dump((q_bl, q_m12), open(q_file, 'wb'))

        # Setup plot
        fig, axs = plt.subplots(1, 2)
        plt.suptitle('Correlation between bl and m12 quantiles')

        # Plot 1
        ax = axs[0]
        pt.setup_axes(plt, ax, yspine=True)
        ax.set_xlabel('Quantile bl')
        ax.set_ylabel('Quantile m12')

        ax.scatter(q_bl, q_m12, edgecolor='none', s=25.0, alpha=0.5)

        # Plot 2
        q_bl = np.array(q_bl)
        q_m12 = np.array(q_m12)

        errors = q_bl - q_m12
        loc, scale = norm.fit(errors, floc=0.0)

        ax = axs[1]
        pt.setup_axes(plt, ax)
        ax.set_xlabel('Difference bl to m12')
        ax.set_ylabel('Probability')
        ax.set_xlim(-1.05, 1.05)
        ax.hist(errors, bins=15, normed=True, histtype='stepfilled', alpha=0.3)
        x = np.linspace(-1.0, 1.0, 100)
        ax.plot(x, norm.pdf(x, loc=loc, scale=scale), color='k')

        # Draw or save the plot
        plt.tight_layout()
        if args.save_plots:
            plot_file = os.path.join(eval_folder, '{0}.pdf'.format(biomarker))
            plt.savefig(plot_file, transparent=True)
        else:
            plt.show()
        plt.close(fig)
예제 #11
0
def plot_model(args, data_handler, biomarker):
    model_file = data_handler.get_model_file(biomarker)
    if not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}'.format(model_file)
        return

    print log.INFO, 'Generating plot for {0}...'.format(biomarker)
    plot_synth_model = args.plot_synth_model and biomarker in SynthModel.get_biomarker_names(
    )

    #
    # Read model
    #
    pm = ProgressionModel(biomarker,
                          model_file,
                          extrapolator=args.extrapolator)
    progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress)
    min_progress_extrapolate = int(pm.min_progress - progress_extrapolate)
    max_progress_extrapolate = int(pm.max_progress + progress_extrapolate)
    progress_linspace_ex1 = np.linspace(min_progress_extrapolate,
                                        pm.min_progress, 20)
    progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60)
    progress_linspace_ex2 = np.linspace(pm.max_progress,
                                        max_progress_extrapolate, 20)

    # Calc min and max val in interval between 1% and 99% percentie
    min_val, max_val = pm.get_value_range([0.1, 0.9])
    #     progress_linspace = np.linspace(min_progress_extrapolate, max_progress_extrapolate, 100)
    #     min_val = float('inf')
    #     max_val = float('-inf')
    #     for quantile in [0.1, 0.9]:
    #         curve = pm.get_quantile_curve(progress_linspace, quantile)
    #         min_val = min(min_val, np.min(curve))
    #         max_val = max(max_val, np.max(curve))

    #
    # Setup plot
    #
    biomarker_string = pt.get_biomarker_string(biomarker)
    figure_width = 6 if args.no_densities or args.only_densities else 12
    fig = plt.figure(figsize=(figure_width, 5))
    if args.only_densities:
        ax1 = None
        ax2 = plt.subplot(1, 1, 1)
        pt.setup_axes(plt, ax2, xgrid=False, ygrid=False)
    elif args.no_densities:
        ax1 = plt.subplot(1, 1, 1)
        ax2 = None
        pt.setup_axes(plt, ax1, xgrid=False, ygrid=False)
    else:
        ax1 = plt.subplot(1, 2, 1)
        ax2 = plt.subplot(1, 2, 2)
        pt.setup_axes(plt, ax1, xgrid=False, ygrid=False)
        pt.setup_axes(plt, ax2)

    if not args.only_densities:
        if args.no_model and not args.plot_synth_model:
            ax1.set_title('Aligned samples for {0}'.format(biomarker_string))
        else:
            ax1.set_title('Quantile curves for {0}'.format(biomarker_string))
        if args.phase == 'mciad':
            ax1.set_xlabel(
                'Disease progress (days before/after conversion to AD)')
        else:
            ax1.set_xlabel(
                'Disease progress (days before/after conversion to MCI)')
        ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker))
        if args.xlim is not None:
            ax1.set_xlim(args.xlim[0], args.xlim[1])
        else:
            ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate)
        if args.ylim is not None:
            ax1.set_ylim(args.ylim[0], args.ylim[1])

    #
    # Plot the percentile curves of the fitted model
    #
    if not args.no_model and not args.only_densities:
        ax1.axvline(pm.min_progress, color='0.15', linestyle=':')
        ax1.axvline(pm.max_progress, color='0.15', linestyle=':')

        quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
        grey_values = ['0.4', '0.2', '0', '0.2', '0.4']
        for grey_value, quantile in zip(grey_values, quantiles):
            curve_int = pm.get_quantile_curve(progress_linspace_int, quantile)
            ax1.plot(progress_linspace_int, curve_int, color=grey_value)

            if not args.no_extrapolation:
                curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1,
                                                  quantile)
                curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2,
                                                  quantile)
                ax1.plot(progress_linspace_ex1,
                         curve_ex1,
                         '--',
                         color=grey_value)
                ax1.plot(progress_linspace_ex2,
                         curve_ex2,
                         '--',
                         color=grey_value)

            if args.plot_quantile_label:
                label = '$q={0}\%$'.format(quantile * 100)
                ax1.text(progress_linspace_int[-1] + 10,
                         curve_int[-1],
                         label,
                         fontsize=10)

        if args.plot_donohue:
            print 'Plotting Donohue'
            donohue_file = os.path.join(
                data_handler._conf.models_folder, 'donohue',
                'population_{0}.csv'.format(biomarker.replace(' ', '.')))
            if not os.path.isfile(donohue_file):
                print log.ERROR, 'Donohue model file not found: {0}'.format(
                    donohue_file)
                return

            r = mlab.csv2rec(donohue_file)
            if args.method == 'joint':
                offset = 2200
            else:
                offset = 300
            progrs = r[r.dtype.names[0]] * 30.44 + offset
            vals = r[r.dtype.names[1]]
            curve_donohue = []
            progr_donohue = []
            for p in progress_linspace_int:
                if progrs[0] < p < progrs[-1]:
                    i = 1
                    while p > progrs[i]:
                        i += 1
                    # TODO linear interpolation
                    progr_donohue.append(progrs[i])
                    curve_donohue.append(vals[i])
            ax1.plot(progr_donohue,
                     curve_donohue,
                     '--',
                     color='b',
                     linewidth=2)

    #
    # Plot synthetic model curve
    #
    if plot_synth_model:
        progress_linspace_synth = np.linspace(-2500, 2500, 100)
        quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
        alphas = [0.4, 0.7, 1.0, 0.7, 0.4]
        for quantile, alpha in zip(quantiles, alphas):
            curve_synth = [
                SynthModel.get_distributed_value(biomarker, p, cdf=quantile)
                for p in progress_linspace_synth
            ]
            ax1.plot(progress_linspace_synth,
                     curve_synth,
                     color='b',
                     alpha=alpha)

    #
    # Plot predictor function
    #
    if args.plot_eta is not None and not args.only_densities:
        # Get second axis of plot 1
        ax1b = ax1.twinx()

        # Plot all progresses
        # ax1b.scatter(pm.all_progresses, pm.all_mus, facecolor='b', marker='o', edgecolor='none', alpha=0.2)
        ax1b.text(pm.progresses[-1],
                  pm.sigmas[-1],
                  '$\mu$',
                  color='b',
                  fontsize=11)

        # Plot binned progresses
        ax1b.scatter(pm.progresses, pm.sigmas, color='b', marker='x')

        # Plot interpolated model
        mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_int]
        ax1b.plot(progress_linspace_int, mus, color='b')

        if not args.no_extrapolation:
            mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex1]
            ax1b.plot(progress_linspace_ex1, mus, '--', color='b')
            mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex2]
            ax1b.plot(progress_linspace_ex2, mus, '--', color='b')
        if args.xlim is not None:
            ax1b.set_xlim(args.xlim[0], args.xlim[1])
        else:
            ax1b.set_xlim(min_progress_extrapolate, max_progress_extrapolate)

    #
    # Plot errors
    #
    if args.plot_errors and not args.only_densities:
        eval_file = model_file.replace('.csv', '_eval_cover.csv')
        if not os.path.isfile(eval_file):
            print log.ERROR, 'Evaluation file not found: {0}'.format(eval_file)
        else:
            m = mlab.csv2rec(eval_file)
            progresses = m['progress']
            errors = m['error']

            # Get second axis of plot 1
            ax1b = ax1.twinx()
            # ax1b.set_ylim(0, max(150, 1.2 * np.max(errors)))
            ax1b.plot(progresses, errors, color='g', marker='x')
            ax1b.text(progresses[-1],
                      errors[-1],
                      'Discr.',
                      color='g',
                      fontsize=11)
            ax1b.axhline(np.mean(errors), color='g', linestyle='--', alpha=0.5)

            median_curve = pm.get_quantile_curve(progresses, 0.5)
            min_value = np.min(median_curve)
            max_value = np.max(median_curve)
            rect = mpl.patches.Rectangle((progresses[0], min_value),
                                         progresses[-1] - progresses[0],
                                         max_value - min_value,
                                         fc=(0.0, 0.5, 0.0, 0.1),
                                         ec=(0.0, 0.5, 0.0, 0.8),
                                         linewidth=1)
            ax1.add_patch(rect)

    #
    # Plot points
    #
    if not args.no_points and not args.only_densities:
        samples_file = data_handler.get_samples_file(biomarker)
        if not os.path.isfile(samples_file):
            print log.ERROR, 'Samples file not found: {0}'.format(samples_file)
        else:
            m = mlab.csv2rec(samples_file)
            progr_points = m['progress']
            value_points = m['value']
            # diagn_points = [0.5 if p < 0 else 1.0 for p in progr_points]
            diagn_points = m['diagnosis']
            diagn_points[(0.25 <= diagn_points) & (diagn_points <= 0.75)] = 0.5

            print log.INFO, 'Plotting {0} sample points...'.format(
                len(progr_points))
            ax1.scatter(progr_points,
                        value_points,
                        s=15.0,
                        c=diagn_points,
                        edgecolor='none',
                        vmin=0.0,
                        vmax=1.0,
                        cmap=pt.progression_cmap,
                        alpha=args.points_alpha)
            if args.phase == 'cnmci':
                rects = [
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_cn + (args.points_alpha, ),
                        linewidth=0),
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_mci + (args.points_alpha, ),
                        linewidth=0)
                ]
                labels = ['CN', 'MCI']
            elif args.phase == 'mciad':
                rects = [
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_mci + (args.points_alpha, ),
                        linewidth=0),
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_ad + (args.points_alpha, ),
                        linewidth=0)
                ]
                labels = ['MCI', 'AD']
            else:
                rects = [
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_cn + (args.points_alpha, ),
                        linewidth=0),
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_mci + (args.points_alpha, ),
                        linewidth=0),
                    mpl.patches.Rectangle(
                        (0, 0),
                        1,
                        1,
                        fc=pt.color_ad + (args.points_alpha, ),
                        linewidth=0)
                ]
                labels = ['CN', 'MCI', 'AD']
            legend = ax1.legend(rects,
                                labels,
                                fontsize=10,
                                ncol=len(rects),
                                loc='upper center',
                                framealpha=0.9)
            legend.get_frame().set_edgecolor((0.6, 0.6, 0.6))

    #
    # Plot PDFs
    #
    progr_samples = [-2000, -1000, 0, 1000, 2000, 3000, 4000] if args.phase == 'joint' else \
                    [-2000, -1500, -1000, -500, 0, 500, 1000, 1500, 2000]

    if args.phase == 'cnmci':
        vmin = -2000
        vmax = 6000
    elif args.phase == 'mciad':
        vmin = -6000
        vmax = 2000
    elif args.phase == 'joint':
        vmin = -2000
        vmax = 4000
    sample_cmap = cmx.ScalarMappable(norm=colors.Normalize(vmin=vmin,
                                                           vmax=vmax),
                                     cmap=plt.get_cmap(pt.progression_cmap))

    if not args.no_sample_lines and not args.only_densities:
        for progr in progr_samples:
            if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress:
                # sample_color = sample_cmap.to_rgba(progr_samples.index(progr))
                sample_color = sample_cmap.to_rgba(progr)
                linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-'
                ax1.axvline(progr,
                            color=sample_color,
                            linestyle=linestyle,
                            alpha=0.3)

    if not args.no_densities:
        ax2.set_title(
            'Probability density function for {0}'.format(biomarker_string))
        ax2.set_xlabel(DataHandler.get_biomarker_unit(biomarker))
        ax2.set_ylabel('Probability')
        if args.ylim is None:
            values = np.linspace(min_val, max_val, 250)
            ax2.set_xlim(min_val, max_val)
        else:
            values = np.linspace(args.ylim[0], args.ylim[1], 250)
            ax2.set_xlim(args.ylim[0], args.ylim[1])

        for progr in progr_samples:
            if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress:
                # sample_color = sample_cmap.to_rgba(progr_samples.index(progr))
                sample_color = sample_cmap.to_rgba(progr)
                linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-'
                probs = pm.get_density_distribution(values, progr)
                ax2.plot(values,
                         probs,
                         label=str(progr),
                         color=sample_color,
                         linestyle=linestyle)

                if plot_synth_model:
                    probs = [
                        SynthModel.get_probability(biomarker, progr, v)
                        for v in values
                    ]
                    ax2.plot(values, probs, color='b', linestyle='--')

        legend = ax2.legend(fontsize=10, loc='best', framealpha=0.9)
        legend.get_frame().set_edgecolor((0.6, 0.6, 0.6))

    #
    # Draw or save the plot
    #
    plt.tight_layout()
    if args.save_plots or args.plot_file is not None:
        if args.plot_file is not None:
            plot_filename = args.plot_file
        else:
            plot_filename = model_file.replace('.csv', '.pdf')
        plt.savefig(plot_filename, transparent=True)
    else:
        plt.show()
    plt.close(fig)
예제 #12
0
def get_fitting_data(args, data_handler_joint):
    biomarkers = data_handler_joint.get_biomarker_names()
    offsets = range(args.search_range[0], args.search_range[1], args.search_range[2])
    errors_file = os.path.join(data_handler_joint.get_eval_folder(),
                               'offset_errors_{0}.p'.format(args.extrapolator))
    if os.path.isfile(errors_file) and not args.recompute_errors:
        print log.INFO, 'Reading errors estimations from file {0}...'.format(errors_file)
        (errors, descriminativeness, overlap) = pickle.load(open(errors_file, 'rb'))
    else:
        data_handler_1 = DataHandler.get_data_handler(method=args.method,
                                                      biomarkers=args.biomarkers,
                                                      phase='cnmci')
        data_handler_2 = DataHandler.get_data_handler(method=args.method,
                                                      biomarkers=args.biomarkers,
                                                      phase='mciad')

        errors = np.zeros((len(biomarkers), len(offsets)))
        descriminativeness = np.zeros(len(biomarkers))
        overlap = []
        for i, biomarker in enumerate(biomarkers):
            # Get error matrix for all biomarkers and offsets
            model_file_1 = data_handler_1.get_model_file(biomarker)
            model_file_2 = data_handler_2.get_model_file(biomarker)
            if os.path.isfile(model_file_1) and os.path.isfile(model_file_2):
                print log.INFO, 'Analysing {0}...'.format(biomarker)

                # Get discriminativeness for all biomarkers as a scaling factor
                eval_file_1 = model_file_1.replace('.csv', '_eval_cover.csv')
                eval_file_2 = model_file_2.replace('.csv', '_eval_cover.csv')
                if os.path.isfile(eval_file_1) and os.path.isfile(eval_file_2):
                    descriminate_1 = np.mean(mlab.csv2rec(eval_file_1)['error'])
                    descriminate_2 = np.mean(mlab.csv2rec(eval_file_2)['error'])
                    descriminativeness[i] = 0.5 * (descriminate_1 + descriminate_2)
                else:
                    print log.WARNING, 'Evaluation file missing for {0}'.format(biomarker)
                    continue

                # Initialise models
                model_1 = ProgressionModel(biomarker, model_file_1, extrapolator=args.extrapolator)
                model_2 = ProgressionModel(biomarker, model_file_2, extrapolator=args.extrapolator)

                # Assemble errors for each offset
                min_val_1, max_val_1 = model_1.get_value_range([0.1, 0.9])
                min_val_2, max_val_2 = model_2.get_value_range([0.1, 0.9])
                values = np.linspace(min(min_val_1, min_val_2), max(max_val_1, max_val_2), 250)
                values_delta = (values.max() - values.min()) / len(values)
                for j, offset in enumerate(offsets):
                    dens_11 = np.array(model_1.get_density_distribution(values, offset + model_2.min_progress))
                    dens_12 = np.array(model_2.get_density_distribution(values, model_2.min_progress))

                    dens_21 = np.array(model_1.get_density_distribution(values, model_1.max_progress))
                    dens_22 = np.array(model_2.get_density_distribution(values, -offset + model_1.max_progress))

                    errors[i, j] = 0.5 * values_delta * (np.sum(np.abs(dens_11 - dens_12)) + np.sum(np.abs(dens_21 - dens_22)))

                # Get overlap
                overlap.append(model_1.max_progress - model_2.min_progress)

        overlap = np.mean(overlap)
        print log.INFO, 'Saving errors to file {0}...'.format(errors_file)
        pickle.dump((errors, descriminativeness, overlap), open(errors_file, 'wb'))

    return biomarkers, offsets, errors, descriminativeness, overlap
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m',
                        '--method',
                        choices=DataHandler.get_method_choices(),
                        default='all',
                        help='the method to collect data for')
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs='+',
                        default=None,
                        help='name of the biomarker to be plotted')
    parser.add_argument('-p',
                        '--phase',
                        default=None,
                        choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('--save_plots',
                        action='store_true',
                        default=False,
                        help='save the plots with a default filename')
    args = parser.parse_args()

    # Collect data for test
    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)
    biomarkers = data_handler.get_biomarker_names()
    measurements = data_handler.get_measurements_as_dict(
        visits=['bl', 'm12'],
        biomarkers=biomarkers,
        select_training_set=True,
        select_complete=True)

    # Setup plotting folder
    eval_folder = DataHandler.make_dir(data_handler.get_eval_folder(),
                                       'quants')

    # Process all biomarkers
    for biomarker in biomarkers:
        print log.INFO, 'Generating quantile correlation plot for {0}...'.format(
            biomarker)
        model_file = data_handler.get_model_file(biomarker)
        pm = ProgressionModel(biomarker, model_file)

        q_file = os.path.join(eval_folder, '{0}.p'.format(biomarker))

        if os.path.isfile(q_file):
            (q_bl, q_m12) = pickle.load(open(q_file, 'rb'))
        else:
            q_bl = []
            q_m12 = []

            for rid in measurements:
                val_bl = measurements[rid]['bl'][biomarker]
                val_m12 = measurements[rid]['m12'][biomarker]

                p_bl = measurements[rid]['bl']['progress']
                p_m12 = measurements[rid]['m12']['progress']

                q_bl.append(pm.approximate_quantile(p_bl, val_bl))
                q_m12.append(pm.approximate_quantile(p_m12, val_m12))

            pickle.dump((q_bl, q_m12), open(q_file, 'wb'))

        # Setup plot
        fig, axs = plt.subplots(1, 2)
        plt.suptitle('Correlation between bl and m12 quantiles')

        # Plot 1
        ax = axs[0]
        pt.setup_axes(plt, ax, yspine=True)
        ax.set_xlabel('Quantile bl')
        ax.set_ylabel('Quantile m12')

        ax.scatter(q_bl, q_m12, edgecolor='none', s=25.0, alpha=0.5)

        # Plot 2
        q_bl = np.array(q_bl)
        q_m12 = np.array(q_m12)

        errors = q_bl - q_m12
        loc, scale = norm.fit(errors, floc=0.0)

        ax = axs[1]
        pt.setup_axes(plt, ax)
        ax.set_xlabel('Difference bl to m12')
        ax.set_ylabel('Probability')
        ax.set_xlim(-1.05, 1.05)
        ax.hist(errors, bins=15, normed=True, histtype='stepfilled', alpha=0.3)
        x = np.linspace(-1.0, 1.0, 100)
        ax.plot(x, norm.pdf(x, loc=loc, scale=scale), color='k')

        # Draw or save the plot
        plt.tight_layout()
        if args.save_plots:
            plot_file = os.path.join(eval_folder, '{0}.pdf'.format(biomarker))
            plt.savefig(plot_file, transparent=True)
        else:
            plt.show()
        plt.close(fig)
def get_biomarker_predictions(visits, predict_biomarker,
                              method=None, biomarkers=None, phase=None,
                              recompute_estimates=False, recompute_predictions=False, estimate_dprs=False,
                              select_test_set=False, consistent_data=False, exclude_cn=False,
                              use_last_visit=False, naive_use_diagnosis=False):

    # Get prediction file
    data_handler = DataHandler.get_data_handler(method=method,
                                                biomarkers=biomarkers,
                                                phase=phase)
    predict_biomarker_str = predict_biomarker.replace(' ', '_')
    predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p'
    if biomarkers is None:
        predict_file_basename = predict_file_trunk.format(predict_biomarker_str,
                                                          method, '_'.join(visits),
                                                          '_last' if use_last_visit else '')
    else:
        estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_')
        predict_file_basename = predict_file_trunk.format(predict_biomarker_str,
                                                          estimate_biomarkers_string,
                                                          '_'.join(visits),
                                                          '_last' if use_last_visit else '')
    prediction_file = os.path.join(data_handler.get_eval_folder(), predict_file_basename)

    # Read if predictions exist, else recompute
    if os.path.isfile(prediction_file) and not recompute_predictions:
        # Read biomarker predictions from file
        print log.INFO, 'Reading {0} predictions from {1}...'.format(predict_biomarker, prediction_file)
        (rids, diagnoses, values_observed, values_naive, values_model) = pickle.load(open(prediction_file, 'rb'))
    else:
        predict_visit = get_predicted_visit(visits)
        print log.INFO, 'Predicting {0} at {1}...'.format(predict_biomarker, predict_visit)

        # Get mean changes from file
        mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p')
        if not os.path.isfile(mean_changes_file):
            print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!'
        mean_changes = pickle.load(open(mean_changes_file, 'rb'))

        # Get DPI estimates
        rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates(visits,
                                                                           method=method,
                                                                           biomarkers=biomarkers,
                                                                           phase=phase,
                                                                           recompute_estimates=recompute_estimates,
                                                                           estimate_dprs=estimate_dprs,
                                                                           select_test_set=select_test_set,
                                                                           consistent_data=consistent_data)

        # Collect biomarker data for test
        measurements = data_handler.get_measurements_as_dict(visits=visits + [predict_visit],
                                                             biomarkers=[predict_biomarker],
                                                             select_test_set=select_test_set,
                                                             select_complete=True)
        model = ProgressionModel(predict_biomarker, data_handler.get_model_file(predict_biomarker))

        print log.INFO, 'Predicting {0} for {1}'.format(predict_biomarker, predict_visit)
        rids = []
        diagnoses = []
        values_observed = []
        values_model = []
        values_naive = []
        for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis, dprs):
            if rid in measurements:
                # Get real biomarker value value at next visit
                scantime_first_visit = measurements[rid][visits[0]]['scantime']
                scantime_next_visit = measurements[rid][predict_visit]['scantime']
                progress_next_visit = ModelFitter.scantime_to_progress(scantime_next_visit, scantime_first_visit, dpi, dpr)
                value_observed = measurements[rid][predict_visit][predict_biomarker]
                values_observed.append(value_observed)

                # Predict biomarker value value at next visit
                if use_last_visit:
                    value = measurements[rid][visits[-1]][predict_biomarker]
                    scantime = measurements[rid][visits[-1]]['scantime']
                    progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr)
                    mean_quantile = model.approximate_quantile(progress, value)
                else:
                    mean_quantile = 0.0
                    for visit in visits:
                        value = measurements[rid][visit][predict_biomarker]
                        scantime = measurements[rid][visit]['scantime']
                        progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr)
                        mean_quantile += model.approximate_quantile(progress, value)
                    mean_quantile /= len(visits)

                value_model = model.get_value_at_quantile(progress_next_visit, mean_quantile)
                values_model.append(value_model)

                # Predict biomarker value naively
                if naive_use_diagnosis:
                    mean_change = mean_changes[predict_biomarker][diagnosis]
                else:
                    mean_change = mean_changes[predict_biomarker][0.66]

                if use_last_visit:
                    x = measurements[rid][visits[-1]]['scantime']
                    y = measurements[rid][visits[-1]][predict_biomarker]
                    intercept = -(mean_change * x - y)
                else:
                    x = np.zeros(len(visits))
                    y = np.zeros(len(visits))
                    for i, visit in enumerate(visits):
                        x[i] = measurements[rid][visit]['scantime']
                        y[i] = measurements[rid][visit][predict_biomarker]
                    intercept = -np.sum(mean_change * x - y) / len(x)

                value_naive = intercept + mean_change * measurements[rid][predict_visit]['scantime']
                values_naive.append(value_naive)

                # Plot estimates
                plot = True
                if plot and diagnosis > 0.0 and dpr > 0.0:
                    plot_predictions(predict_biomarker, model, visits, measurements[rid], dpi, dpr,
                                     value_model, value_naive,
                                     mean_quantile, mean_change, intercept, rid)

                # Append rid and diagnosis
                rids.append(rid)
                diagnoses.append(diagnosis)

                # Print result
                print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format(predict_biomarker, rid, value_observed, value_naive, value_model)

        # Save results
        print log.INFO, 'Saving {0} predictions to {1}...'.format(predict_biomarker, prediction_file)
        pickle.dump((rids, diagnoses, values_observed, values_naive, values_model), open(prediction_file, 'wb'))

    rids = np.array(rids)
    diagnoses = np.array(diagnoses)
    values_observed = np.array(values_observed)
    values_naive = np.array(values_naive)
    values_model = np.array(values_model)

    # Exclude healthy subjects
    if exclude_cn:
        indices = np.where(diagnoses > 0.25)
        rids = rids[indices]
        diagnoses = diagnoses[indices]
        values_observed = values_observed[indices]
        values_naive = values_naive[indices]
        values_model = values_model[indices]

    return rids, diagnoses, values_observed, values_naive, values_model
def plot_biomarker(data_handler, biomarker, measurements, dpi, dpr):
    """
    Plot the model of one biomarker with the fitted values

    :param data_handler: the data handler
    :param biomarker: the biomarker to plot
    :param measurements: the measurements containing the biomarker samples of one subject
    :param dpi: the estimated DPI
    :param dpr: the estimated DPR
    """
    model_file = data_handler.get_model_file(biomarker)
    if not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}'.format(model_file)
        return

    print log.INFO, 'Generating plot for {0}...'.format(biomarker)

    #
    # Read model
    #
    pm = ProgressionModel(biomarker, model_file)
    progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress)
    min_progress_extrapolate = int(pm.min_progress - progress_extrapolate)
    max_progress_extrapolate = int(pm.max_progress + progress_extrapolate)
    progress_linspace_ex1 = np.linspace(min_progress_extrapolate, pm.min_progress, 20)
    progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60)
    progress_linspace_ex2 = np.linspace(pm.max_progress, max_progress_extrapolate, 20)

    #
    # Setup plot
    #
    biomarker_string = pt.get_biomarker_string(biomarker)
    figure_width = 6
    fig = plt.figure(figsize=(figure_width, 5))
    ax1 = plt.subplot(1, 1, 1)
    pt.setup_axes(plt, ax1, xgrid=False, ygrid=False)
    ax1.set_title('Model for {0} with fitted sample values'.format(biomarker_string))
    ax1.set_xlabel('Disease progress (days before/after conversion to MCI)')
    ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker))
    ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate)

    #
    # Plot the percentile curves of the fitted model
    #
    ax1.axvline(pm.min_progress, color='0.15', linestyle=':')
    ax1.axvline(pm.max_progress, color='0.15', linestyle=':')

    quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
    grey_values = ['0.4', '0.2', '0', '0.2', '0.4']
    for grey_value, quantile in zip(grey_values, quantiles):
        curve_int = pm.get_quantile_curve(progress_linspace_int, quantile)
        ax1.plot(progress_linspace_int, curve_int, color=grey_value)

        curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile)
        curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile)
        ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value)
        ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value)

        label = 'q = {0}'.format(quantile * 100)
        ax1.text(progress_linspace_int[-1] + 100, curve_int[-1], label, fontsize=10)

    #
    # Plot points
    #
    progr_points = []
    value_points = []
    diagn_points = []
    for visit in measurements[0]:
        if biomarker in measurements[0][visit]:
            progress = measurements[0][visit]['scantime'] * dpr + dpi
            value = measurements[0][visit][biomarker]
            progr_points.append(progress)
            value_points.append(value)
            diagn_points.append(1.0)
            ax1.axvline(progress, color='b', linestyle='--')
            ax1.text(progress + 150, value, visit, color='b', fontsize=10)

    ax1.scatter(progr_points, value_points, s=25.0, color='b', edgecolor='none',
                vmin=0.0, vmax=1.0, alpha=0.9)

    #
    # Draw or save the plot
    #
    plt.tight_layout()
    plt.show()
    plt.close(fig)
def plot_model(args, data_handler, biomarker):
    model_file = data_handler.get_model_file(biomarker)
    if not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}'.format(model_file)
        return

    print log.INFO, 'Generating plot for {0}...'.format(biomarker)
    plot_synth_model = args.plot_synth_model and biomarker in SynthModel.get_biomarker_names()

    #
    # Read model
    #
    pm = ProgressionModel(biomarker, model_file, extrapolator=args.extrapolator)
    progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress)
    min_progress_extrapolate = int(pm.min_progress - progress_extrapolate)
    max_progress_extrapolate = int(pm.max_progress + progress_extrapolate)
    progress_linspace_ex1 = np.linspace(min_progress_extrapolate, pm.min_progress, 20)
    progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60)
    progress_linspace_ex2 = np.linspace(pm.max_progress, max_progress_extrapolate, 20)

    # Calc min and max val in interval between 1% and 99% percentie
    min_val, max_val = pm.get_value_range([0.1, 0.9])
#     progress_linspace = np.linspace(min_progress_extrapolate, max_progress_extrapolate, 100)
#     min_val = float('inf')
#     max_val = float('-inf')
#     for quantile in [0.1, 0.9]:
#         curve = pm.get_quantile_curve(progress_linspace, quantile)
#         min_val = min(min_val, np.min(curve))
#         max_val = max(max_val, np.max(curve))

    #
    # Setup plot
    #
    biomarker_string = pt.get_biomarker_string(biomarker)
    figure_width = 6 if args.no_densities or args.only_densities else 12
    fig = plt.figure(figsize=(figure_width, 5))
    if args.only_densities:
        ax1 = None
        ax2 = plt.subplot(1, 1, 1)
        pt.setup_axes(plt, ax2, xgrid=False, ygrid=False)
    elif args.no_densities:
        ax1 = plt.subplot(1, 1, 1)
        ax2 = None
        pt.setup_axes(plt, ax1, xgrid=False, ygrid=False)
    else:
        ax1 = plt.subplot(1, 2, 1)
        ax2 = plt.subplot(1, 2, 2)
        pt.setup_axes(plt, ax1, xgrid=False, ygrid=False)
        pt.setup_axes(plt, ax2)

    if not args.only_densities:
        if args.no_model and not args.plot_synth_model:
            ax1.set_title('Aligned samples for {0}'.format(biomarker_string))
        else:
            ax1.set_title('Quantile curves for {0}'.format(biomarker_string))
        if args.phase == 'mciad':
            ax1.set_xlabel('Disease progress (days before/after conversion to AD)')
        else:
            ax1.set_xlabel('Disease progress (days before/after conversion to MCI)')
        ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker))
        if args.xlim is not None:
            ax1.set_xlim(args.xlim[0], args.xlim[1])
        else:
            ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate)
        if args.ylim is not None:
            ax1.set_ylim(args.ylim[0], args.ylim[1])

    #
    # Plot the percentile curves of the fitted model
    #
    if not args.no_model and not args.only_densities:
        ax1.axvline(pm.min_progress, color='0.15', linestyle=':')
        ax1.axvline(pm.max_progress, color='0.15', linestyle=':')

        quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
        grey_values = ['0.4', '0.2', '0', '0.2', '0.4']
        for grey_value, quantile in zip(grey_values, quantiles):
            curve_int = pm.get_quantile_curve(progress_linspace_int, quantile)
            ax1.plot(progress_linspace_int, curve_int, color=grey_value)

            if not args.no_extrapolation:
                curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile)
                curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile)
                ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value)
                ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value)

            if args.plot_quantile_label:
                label = '$q={0}\%$'.format(quantile * 100)
                ax1.text(progress_linspace_int[-1] + 10, curve_int[-1], label, fontsize=10)

        if args.plot_donohue:
            print 'Plotting Donohue'
            donohue_file = os.path.join(data_handler._conf.models_folder,
                                        'donohue', 'population_{0}.csv'.format(biomarker.replace(' ', '.')))
            if not os.path.isfile(donohue_file):
                print log.ERROR, 'Donohue model file not found: {0}'.format(donohue_file)
                return

            r = mlab.csv2rec(donohue_file)
            if args.method == 'joint':
                offset = 2200
            else:
                offset = 300
            progrs = r[r.dtype.names[0]] * 30.44 + offset
            vals = r[r.dtype.names[1]]
            curve_donohue = []
            progr_donohue = []
            for p in progress_linspace_int:
                if progrs[0] < p < progrs[-1]:
                    i = 1
                    while p > progrs[i]:
                        i += 1
                    # TODO linear interpolation
                    progr_donohue.append(progrs[i])
                    curve_donohue.append(vals[i])
            ax1.plot(progr_donohue, curve_donohue, '--', color='b', linewidth=2)

    #
    # Plot synthetic model curve
    #
    if plot_synth_model:
        progress_linspace_synth = np.linspace(-2500, 2500, 100)
        quantiles = [0.1, 0.25, 0.5, 0.75, 0.9]
        alphas = [0.4, 0.7, 1.0, 0.7, 0.4]
        for quantile, alpha in zip(quantiles, alphas):
            curve_synth = [SynthModel.get_distributed_value(biomarker, p, cdf=quantile) for p in progress_linspace_synth]
            ax1.plot(progress_linspace_synth, curve_synth, color='b', alpha=alpha)

    #
    # Plot predictor function
    #
    if args.plot_eta is not None and not args.only_densities:
        # Get second axis of plot 1
        ax1b = ax1.twinx()

        # Plot all progresses
        # ax1b.scatter(pm.all_progresses, pm.all_mus, facecolor='b', marker='o', edgecolor='none', alpha=0.2)
        ax1b.text(pm.progresses[-1], pm.sigmas[-1], '$\mu$', color='b', fontsize=11)

        # Plot binned progresses
        ax1b.scatter(pm.progresses, pm.sigmas, color='b', marker='x')

        # Plot interpolated model
        mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_int]
        ax1b.plot(progress_linspace_int, mus, color='b')

        if not args.no_extrapolation:
            mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex1]
            ax1b.plot(progress_linspace_ex1, mus, '--', color='b')
            mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex2]
            ax1b.plot(progress_linspace_ex2, mus, '--', color='b')
        if args.xlim is not None:
            ax1b.set_xlim(args.xlim[0], args.xlim[1])
        else:
            ax1b.set_xlim(min_progress_extrapolate, max_progress_extrapolate)

    #
    # Plot errors
    #
    if args.plot_errors and not args.only_densities:
        eval_file = model_file.replace('.csv', '_eval_cover.csv')
        if not os.path.isfile(eval_file):
            print log.ERROR, 'Evaluation file not found: {0}'.format(eval_file)
        else:
            m = mlab.csv2rec(eval_file)
            progresses = m['progress']
            errors = m['error']

            # Get second axis of plot 1
            ax1b = ax1.twinx()
            # ax1b.set_ylim(0, max(150, 1.2 * np.max(errors)))
            ax1b.plot(progresses, errors, color='g', marker='x')
            ax1b.text(progresses[-1], errors[-1], 'Discr.', color='g', fontsize=11)
            ax1b.axhline(np.mean(errors), color='g', linestyle='--', alpha=0.5)

            median_curve = pm.get_quantile_curve(progresses, 0.5)
            min_value = np.min(median_curve)
            max_value = np.max(median_curve)
            rect = mpl.patches.Rectangle((progresses[0], min_value), progresses[-1] - progresses[0],
                                         max_value - min_value,
                                         fc=(0.0, 0.5, 0.0, 0.1), ec=(0.0, 0.5, 0.0, 0.8),
                                         linewidth=1)
            ax1.add_patch(rect)

    #
    # Plot points
    #
    if not args.no_points and not args.only_densities:
        samples_file = data_handler.get_samples_file(biomarker)
        if not os.path.isfile(samples_file):
            print log.ERROR, 'Samples file not found: {0}'.format(samples_file)
        else:
            m = mlab.csv2rec(samples_file)
            progr_points = m['progress']
            value_points = m['value']
            # diagn_points = [0.5 if p < 0 else 1.0 for p in progr_points]
            diagn_points = m['diagnosis']
            diagn_points[(0.25 <= diagn_points) & (diagn_points <= 0.75)] = 0.5

            print log.INFO, 'Plotting {0} sample points...'.format(len(progr_points))
            ax1.scatter(progr_points, value_points, s=15.0, c=diagn_points, edgecolor='none',
                        vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=args.points_alpha)
            if args.phase == 'cnmci':
                rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (args.points_alpha,), linewidth=0),
                         mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha,), linewidth=0)]
                labels = ['CN', 'MCI']
            elif args.phase == 'mciad':
                rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha,), linewidth=0),
                         mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (args.points_alpha,), linewidth=0)]
                labels = ['MCI', 'AD']
            else:
                rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (args.points_alpha,), linewidth=0),
                         mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha,), linewidth=0),
                         mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (args.points_alpha,), linewidth=0)]
                labels = ['CN', 'MCI', 'AD']
            legend = ax1.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9)
            legend.get_frame().set_edgecolor((0.6, 0.6, 0.6))

    #
    # Plot PDFs
    #
    progr_samples = [-2000, -1000, 0, 1000, 2000, 3000, 4000] if args.phase == 'joint' else \
                    [-2000, -1500, -1000, -500, 0, 500, 1000, 1500, 2000]

    if args.phase == 'cnmci':
        vmin = -2000
        vmax = 6000
    elif args.phase == 'mciad':
        vmin = -6000
        vmax = 2000
    elif args.phase == 'joint':
        vmin = -2000
        vmax = 4000
    sample_cmap = cmx.ScalarMappable(
        norm=colors.Normalize(vmin=vmin, vmax=vmax),
        cmap=plt.get_cmap(pt.progression_cmap))

    if not args.no_sample_lines and not args.only_densities:
        for progr in progr_samples:
            if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress:
                # sample_color = sample_cmap.to_rgba(progr_samples.index(progr))
                sample_color = sample_cmap.to_rgba(progr)
                linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-'
                ax1.axvline(progr, color=sample_color, linestyle=linestyle, alpha=0.3)

    if not args.no_densities:
        ax2.set_title('Probability density function for {0}'.format(biomarker_string))
        ax2.set_xlabel(DataHandler.get_biomarker_unit(biomarker))
        ax2.set_ylabel('Probability')
        if args.ylim is None:
            values = np.linspace(min_val, max_val, 250)
            ax2.set_xlim(min_val, max_val)
        else:
            values = np.linspace(args.ylim[0], args.ylim[1], 250)
            ax2.set_xlim(args.ylim[0], args.ylim[1])

        for progr in progr_samples:
            if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress:
                # sample_color = sample_cmap.to_rgba(progr_samples.index(progr))
                sample_color = sample_cmap.to_rgba(progr)
                linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-'
                probs = pm.get_density_distribution(values, progr)
                ax2.plot(values, probs, label=str(progr), color=sample_color, linestyle=linestyle)

                if plot_synth_model:
                    probs = [SynthModel.get_probability(biomarker, progr, v) for v in values]
                    ax2.plot(values, probs, color='b', linestyle='--')

        legend = ax2.legend(fontsize=10, loc='best', framealpha=0.9)
        legend.get_frame().set_edgecolor((0.6, 0.6, 0.6))

    #
    # Draw or save the plot
    #
    plt.tight_layout()
    if args.save_plots or args.plot_file is not None:
        if args.plot_file is not None:
            plot_filename = args.plot_file
        else:
            plot_filename = model_file.replace('.csv', '.pdf')
        plt.savefig(plot_filename, transparent=True)
    else:
        plt.show()
    plt.close(fig)
def get_biomarker_predictions(visits,
                              predict_biomarker,
                              method=None,
                              biomarkers=None,
                              phase=None,
                              recompute_estimates=False,
                              recompute_predictions=False,
                              estimate_dprs=False,
                              select_test_set=False,
                              consistent_data=False,
                              exclude_cn=False,
                              use_last_visit=False,
                              naive_use_diagnosis=False):

    # Get prediction file
    data_handler = DataHandler.get_data_handler(method=method,
                                                biomarkers=biomarkers,
                                                phase=phase)
    predict_biomarker_str = predict_biomarker.replace(' ', '_')
    predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p'
    if biomarkers is None:
        predict_file_basename = predict_file_trunk.format(
            predict_biomarker_str, method, '_'.join(visits),
            '_last' if use_last_visit else '')
    else:
        estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_')
        predict_file_basename = predict_file_trunk.format(
            predict_biomarker_str, estimate_biomarkers_string,
            '_'.join(visits), '_last' if use_last_visit else '')
    prediction_file = os.path.join(data_handler.get_eval_folder(),
                                   predict_file_basename)

    # Read if predictions exist, else recompute
    if os.path.isfile(prediction_file) and not recompute_predictions:
        # Read biomarker predictions from file
        print log.INFO, 'Reading {0} predictions from {1}...'.format(
            predict_biomarker, prediction_file)
        (rids, diagnoses, values_observed, values_naive,
         values_model) = pickle.load(open(prediction_file, 'rb'))
    else:
        predict_visit = get_predicted_visit(visits)
        print log.INFO, 'Predicting {0} at {1}...'.format(
            predict_biomarker, predict_visit)

        # Get mean changes from file
        mean_changes_file = os.path.join(data_handler.get_eval_folder(),
                                         'mean_changes.p')
        if not os.path.isfile(mean_changes_file):
            print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!'
        mean_changes = pickle.load(open(mean_changes_file, 'rb'))

        # Get DPI estimates
        rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates(
            visits,
            method=method,
            biomarkers=biomarkers,
            phase=phase,
            recompute_estimates=recompute_estimates,
            estimate_dprs=estimate_dprs,
            select_test_set=select_test_set,
            consistent_data=consistent_data)

        # Collect biomarker data for test
        measurements = data_handler.get_measurements_as_dict(
            visits=visits + [predict_visit],
            biomarkers=[predict_biomarker],
            select_test_set=select_test_set,
            select_complete=True)
        model = ProgressionModel(
            predict_biomarker, data_handler.get_model_file(predict_biomarker))

        print log.INFO, 'Predicting {0} for {1}'.format(
            predict_biomarker, predict_visit)
        rids = []
        diagnoses = []
        values_observed = []
        values_model = []
        values_naive = []
        for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis,
                                            dprs):
            if rid in measurements:
                # Get real biomarker value value at next visit
                scantime_first_visit = measurements[rid][visits[0]]['scantime']
                scantime_next_visit = measurements[rid][predict_visit][
                    'scantime']
                progress_next_visit = ModelFitter.scantime_to_progress(
                    scantime_next_visit, scantime_first_visit, dpi, dpr)
                value_observed = measurements[rid][predict_visit][
                    predict_biomarker]
                values_observed.append(value_observed)

                # Predict biomarker value value at next visit
                if use_last_visit:
                    value = measurements[rid][visits[-1]][predict_biomarker]
                    scantime = measurements[rid][visits[-1]]['scantime']
                    progress = ModelFitter.scantime_to_progress(
                        scantime, scantime_first_visit, dpi, dpr)
                    mean_quantile = model.approximate_quantile(progress, value)
                else:
                    mean_quantile = 0.0
                    for visit in visits:
                        value = measurements[rid][visit][predict_biomarker]
                        scantime = measurements[rid][visit]['scantime']
                        progress = ModelFitter.scantime_to_progress(
                            scantime, scantime_first_visit, dpi, dpr)
                        mean_quantile += model.approximate_quantile(
                            progress, value)
                    mean_quantile /= len(visits)

                value_model = model.get_value_at_quantile(
                    progress_next_visit, mean_quantile)
                values_model.append(value_model)

                # Predict biomarker value naively
                if naive_use_diagnosis:
                    mean_change = mean_changes[predict_biomarker][diagnosis]
                else:
                    mean_change = mean_changes[predict_biomarker][0.66]

                if use_last_visit:
                    x = measurements[rid][visits[-1]]['scantime']
                    y = measurements[rid][visits[-1]][predict_biomarker]
                    intercept = -(mean_change * x - y)
                else:
                    x = np.zeros(len(visits))
                    y = np.zeros(len(visits))
                    for i, visit in enumerate(visits):
                        x[i] = measurements[rid][visit]['scantime']
                        y[i] = measurements[rid][visit][predict_biomarker]
                    intercept = -np.sum(mean_change * x - y) / len(x)

                value_naive = intercept + mean_change * measurements[rid][
                    predict_visit]['scantime']
                values_naive.append(value_naive)

                # Plot estimates
                plot = True
                if plot and diagnosis > 0.0 and dpr > 0.0:
                    plot_predictions(predict_biomarker, model, visits,
                                     measurements[rid], dpi, dpr, value_model,
                                     value_naive, mean_quantile, mean_change,
                                     intercept, rid)

                # Append rid and diagnosis
                rids.append(rid)
                diagnoses.append(diagnosis)

                # Print result
                print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format(
                    predict_biomarker, rid, value_observed, value_naive,
                    value_model)

        # Save results
        print log.INFO, 'Saving {0} predictions to {1}...'.format(
            predict_biomarker, prediction_file)
        pickle.dump(
            (rids, diagnoses, values_observed, values_naive, values_model),
            open(prediction_file, 'wb'))

    rids = np.array(rids)
    diagnoses = np.array(diagnoses)
    values_observed = np.array(values_observed)
    values_naive = np.array(values_naive)
    values_model = np.array(values_model)

    # Exclude healthy subjects
    if exclude_cn:
        indices = np.where(diagnoses > 0.25)
        rids = rids[indices]
        diagnoses = diagnoses[indices]
        values_observed = values_observed[indices]
        values_naive = values_naive[indices]
        values_model = values_model[indices]

    return rids, diagnoses, values_observed, values_naive, values_model