def evaluate_biomarker_cover(args, data_handler, biomarker):
    model_file = data_handler.get_model_file(biomarker)
    eval_file = model_file.replace('.csv', '_eval_{0}.csv'.format(args.metric))

    if os.path.isfile(eval_file) and not args.recompute_metric:
        print log.SKIP, 'Evaluation file already existing: {0}'.format(eval_file)
    elif not os.path.isfile(model_file):
        print log.ERROR, 'Model file not found: {0}!'.format(model_file)
    else:
        model = ProgressionModel(biomarker, model_file)

        # Determine value and progress interval
        progresses = np.linspace(model.min_progress, model.max_progress, args.progress_samples)
        median_curve = model.get_quantile_curve(progresses, 0.5)
        min_value = np.min(median_curve)
        max_value = np.max(median_curve)

        print log.INFO, 'Evaluating {0} steps in progress interval [{1}, {2}] for values in [{3}, {4}]...'.format(
            args.progress_samples, progresses[0], progresses[-1], min_value, max_value)

        # Compute error
        writer = csv.writer(open(eval_file, 'wb'), delimiter=',')
        writer.writerow(['progress', 'error'])

        # Compute error
        total_error = 0
        for progress in progresses:
            min_q = model.approximate_quantile(progress, min_value)
            max_q = model.approximate_quantile(progress, max_value)
            quantile_range = max_q - min_q
            total_error += quantile_range

            writer.writerow([progress, quantile_range])

        total_error /= len(progresses)
        print log.RESULT, 'Total error {0}: {1}'.format(biomarker, total_error)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m',
                        '--method',
                        choices=DataHandler.get_method_choices(),
                        default='all',
                        help='the method to collect data for')
    parser.add_argument('-b',
                        '--biomarkers',
                        nargs='+',
                        default=None,
                        help='name of the biomarker to be plotted')
    parser.add_argument('-p',
                        '--phase',
                        default=None,
                        choices=DataHandler.get_phase_choices(),
                        help='the phase for which the model is to be trained')
    parser.add_argument('--save_plots',
                        action='store_true',
                        default=False,
                        help='save the plots with a default filename')
    args = parser.parse_args()

    # Collect data for test
    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)
    biomarkers = data_handler.get_biomarker_names()
    measurements = data_handler.get_measurements_as_dict(
        visits=['bl', 'm12'],
        biomarkers=biomarkers,
        select_training_set=True,
        select_complete=True)

    # Setup plotting folder
    eval_folder = DataHandler.make_dir(data_handler.get_eval_folder(),
                                       'quants')

    # Process all biomarkers
    for biomarker in biomarkers:
        print log.INFO, 'Generating quantile correlation plot for {0}...'.format(
            biomarker)
        model_file = data_handler.get_model_file(biomarker)
        pm = ProgressionModel(biomarker, model_file)

        q_file = os.path.join(eval_folder, '{0}.p'.format(biomarker))

        if os.path.isfile(q_file):
            (q_bl, q_m12) = pickle.load(open(q_file, 'rb'))
        else:
            q_bl = []
            q_m12 = []

            for rid in measurements:
                val_bl = measurements[rid]['bl'][biomarker]
                val_m12 = measurements[rid]['m12'][biomarker]

                p_bl = measurements[rid]['bl']['progress']
                p_m12 = measurements[rid]['m12']['progress']

                q_bl.append(pm.approximate_quantile(p_bl, val_bl))
                q_m12.append(pm.approximate_quantile(p_m12, val_m12))

            pickle.dump((q_bl, q_m12), open(q_file, 'wb'))

        # Setup plot
        fig, axs = plt.subplots(1, 2)
        plt.suptitle('Correlation between bl and m12 quantiles')

        # Plot 1
        ax = axs[0]
        pt.setup_axes(plt, ax, yspine=True)
        ax.set_xlabel('Quantile bl')
        ax.set_ylabel('Quantile m12')

        ax.scatter(q_bl, q_m12, edgecolor='none', s=25.0, alpha=0.5)

        # Plot 2
        q_bl = np.array(q_bl)
        q_m12 = np.array(q_m12)

        errors = q_bl - q_m12
        loc, scale = norm.fit(errors, floc=0.0)

        ax = axs[1]
        pt.setup_axes(plt, ax)
        ax.set_xlabel('Difference bl to m12')
        ax.set_ylabel('Probability')
        ax.set_xlim(-1.05, 1.05)
        ax.hist(errors, bins=15, normed=True, histtype='stepfilled', alpha=0.3)
        x = np.linspace(-1.0, 1.0, 100)
        ax.plot(x, norm.pdf(x, loc=loc, scale=scale), color='k')

        # Draw or save the plot
        plt.tight_layout()
        if args.save_plots:
            plot_file = os.path.join(eval_folder, '{0}.pdf'.format(biomarker))
            plt.savefig(plot_file, transparent=True)
        else:
            plt.show()
        plt.close(fig)
def get_biomarker_predictions(visits,
                              predict_biomarker,
                              method=None,
                              biomarkers=None,
                              phase=None,
                              recompute_estimates=False,
                              recompute_predictions=False,
                              estimate_dprs=False,
                              select_test_set=False,
                              consistent_data=False,
                              exclude_cn=False,
                              use_last_visit=False,
                              naive_use_diagnosis=False):

    # Get prediction file
    data_handler = DataHandler.get_data_handler(method=method,
                                                biomarkers=biomarkers,
                                                phase=phase)
    predict_biomarker_str = predict_biomarker.replace(' ', '_')
    predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p'
    if biomarkers is None:
        predict_file_basename = predict_file_trunk.format(
            predict_biomarker_str, method, '_'.join(visits),
            '_last' if use_last_visit else '')
    else:
        estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_')
        predict_file_basename = predict_file_trunk.format(
            predict_biomarker_str, estimate_biomarkers_string,
            '_'.join(visits), '_last' if use_last_visit else '')
    prediction_file = os.path.join(data_handler.get_eval_folder(),
                                   predict_file_basename)

    # Read if predictions exist, else recompute
    if os.path.isfile(prediction_file) and not recompute_predictions:
        # Read biomarker predictions from file
        print log.INFO, 'Reading {0} predictions from {1}...'.format(
            predict_biomarker, prediction_file)
        (rids, diagnoses, values_observed, values_naive,
         values_model) = pickle.load(open(prediction_file, 'rb'))
    else:
        predict_visit = get_predicted_visit(visits)
        print log.INFO, 'Predicting {0} at {1}...'.format(
            predict_biomarker, predict_visit)

        # Get mean changes from file
        mean_changes_file = os.path.join(data_handler.get_eval_folder(),
                                         'mean_changes.p')
        if not os.path.isfile(mean_changes_file):
            print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!'
        mean_changes = pickle.load(open(mean_changes_file, 'rb'))

        # Get DPI estimates
        rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates(
            visits,
            method=method,
            biomarkers=biomarkers,
            phase=phase,
            recompute_estimates=recompute_estimates,
            estimate_dprs=estimate_dprs,
            select_test_set=select_test_set,
            consistent_data=consistent_data)

        # Collect biomarker data for test
        measurements = data_handler.get_measurements_as_dict(
            visits=visits + [predict_visit],
            biomarkers=[predict_biomarker],
            select_test_set=select_test_set,
            select_complete=True)
        model = ProgressionModel(
            predict_biomarker, data_handler.get_model_file(predict_biomarker))

        print log.INFO, 'Predicting {0} for {1}'.format(
            predict_biomarker, predict_visit)
        rids = []
        diagnoses = []
        values_observed = []
        values_model = []
        values_naive = []
        for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis,
                                            dprs):
            if rid in measurements:
                # Get real biomarker value value at next visit
                scantime_first_visit = measurements[rid][visits[0]]['scantime']
                scantime_next_visit = measurements[rid][predict_visit][
                    'scantime']
                progress_next_visit = ModelFitter.scantime_to_progress(
                    scantime_next_visit, scantime_first_visit, dpi, dpr)
                value_observed = measurements[rid][predict_visit][
                    predict_biomarker]
                values_observed.append(value_observed)

                # Predict biomarker value value at next visit
                if use_last_visit:
                    value = measurements[rid][visits[-1]][predict_biomarker]
                    scantime = measurements[rid][visits[-1]]['scantime']
                    progress = ModelFitter.scantime_to_progress(
                        scantime, scantime_first_visit, dpi, dpr)
                    mean_quantile = model.approximate_quantile(progress, value)
                else:
                    mean_quantile = 0.0
                    for visit in visits:
                        value = measurements[rid][visit][predict_biomarker]
                        scantime = measurements[rid][visit]['scantime']
                        progress = ModelFitter.scantime_to_progress(
                            scantime, scantime_first_visit, dpi, dpr)
                        mean_quantile += model.approximate_quantile(
                            progress, value)
                    mean_quantile /= len(visits)

                value_model = model.get_value_at_quantile(
                    progress_next_visit, mean_quantile)
                values_model.append(value_model)

                # Predict biomarker value naively
                if naive_use_diagnosis:
                    mean_change = mean_changes[predict_biomarker][diagnosis]
                else:
                    mean_change = mean_changes[predict_biomarker][0.66]

                if use_last_visit:
                    x = measurements[rid][visits[-1]]['scantime']
                    y = measurements[rid][visits[-1]][predict_biomarker]
                    intercept = -(mean_change * x - y)
                else:
                    x = np.zeros(len(visits))
                    y = np.zeros(len(visits))
                    for i, visit in enumerate(visits):
                        x[i] = measurements[rid][visit]['scantime']
                        y[i] = measurements[rid][visit][predict_biomarker]
                    intercept = -np.sum(mean_change * x - y) / len(x)

                value_naive = intercept + mean_change * measurements[rid][
                    predict_visit]['scantime']
                values_naive.append(value_naive)

                # Plot estimates
                plot = True
                if plot and diagnosis > 0.0 and dpr > 0.0:
                    plot_predictions(predict_biomarker, model, visits,
                                     measurements[rid], dpi, dpr, value_model,
                                     value_naive, mean_quantile, mean_change,
                                     intercept, rid)

                # Append rid and diagnosis
                rids.append(rid)
                diagnoses.append(diagnosis)

                # Print result
                print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format(
                    predict_biomarker, rid, value_observed, value_naive,
                    value_model)

        # Save results
        print log.INFO, 'Saving {0} predictions to {1}...'.format(
            predict_biomarker, prediction_file)
        pickle.dump(
            (rids, diagnoses, values_observed, values_naive, values_model),
            open(prediction_file, 'wb'))

    rids = np.array(rids)
    diagnoses = np.array(diagnoses)
    values_observed = np.array(values_observed)
    values_naive = np.array(values_naive)
    values_model = np.array(values_model)

    # Exclude healthy subjects
    if exclude_cn:
        indices = np.where(diagnoses > 0.25)
        rids = rids[indices]
        diagnoses = diagnoses[indices]
        values_observed = values_observed[indices]
        values_naive = values_naive[indices]
        values_model = values_model[indices]

    return rids, diagnoses, values_observed, values_naive, values_model
def get_biomarker_predictions(visits, predict_biomarker,
                              method=None, biomarkers=None, phase=None,
                              recompute_estimates=False, recompute_predictions=False, estimate_dprs=False,
                              select_test_set=False, consistent_data=False, exclude_cn=False,
                              use_last_visit=False, naive_use_diagnosis=False):

    # Get prediction file
    data_handler = DataHandler.get_data_handler(method=method,
                                                biomarkers=biomarkers,
                                                phase=phase)
    predict_biomarker_str = predict_biomarker.replace(' ', '_')
    predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p'
    if biomarkers is None:
        predict_file_basename = predict_file_trunk.format(predict_biomarker_str,
                                                          method, '_'.join(visits),
                                                          '_last' if use_last_visit else '')
    else:
        estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_')
        predict_file_basename = predict_file_trunk.format(predict_biomarker_str,
                                                          estimate_biomarkers_string,
                                                          '_'.join(visits),
                                                          '_last' if use_last_visit else '')
    prediction_file = os.path.join(data_handler.get_eval_folder(), predict_file_basename)

    # Read if predictions exist, else recompute
    if os.path.isfile(prediction_file) and not recompute_predictions:
        # Read biomarker predictions from file
        print log.INFO, 'Reading {0} predictions from {1}...'.format(predict_biomarker, prediction_file)
        (rids, diagnoses, values_observed, values_naive, values_model) = pickle.load(open(prediction_file, 'rb'))
    else:
        predict_visit = get_predicted_visit(visits)
        print log.INFO, 'Predicting {0} at {1}...'.format(predict_biomarker, predict_visit)

        # Get mean changes from file
        mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p')
        if not os.path.isfile(mean_changes_file):
            print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!'
        mean_changes = pickle.load(open(mean_changes_file, 'rb'))

        # Get DPI estimates
        rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates(visits,
                                                                           method=method,
                                                                           biomarkers=biomarkers,
                                                                           phase=phase,
                                                                           recompute_estimates=recompute_estimates,
                                                                           estimate_dprs=estimate_dprs,
                                                                           select_test_set=select_test_set,
                                                                           consistent_data=consistent_data)

        # Collect biomarker data for test
        measurements = data_handler.get_measurements_as_dict(visits=visits + [predict_visit],
                                                             biomarkers=[predict_biomarker],
                                                             select_test_set=select_test_set,
                                                             select_complete=True)
        model = ProgressionModel(predict_biomarker, data_handler.get_model_file(predict_biomarker))

        print log.INFO, 'Predicting {0} for {1}'.format(predict_biomarker, predict_visit)
        rids = []
        diagnoses = []
        values_observed = []
        values_model = []
        values_naive = []
        for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis, dprs):
            if rid in measurements:
                # Get real biomarker value value at next visit
                scantime_first_visit = measurements[rid][visits[0]]['scantime']
                scantime_next_visit = measurements[rid][predict_visit]['scantime']
                progress_next_visit = ModelFitter.scantime_to_progress(scantime_next_visit, scantime_first_visit, dpi, dpr)
                value_observed = measurements[rid][predict_visit][predict_biomarker]
                values_observed.append(value_observed)

                # Predict biomarker value value at next visit
                if use_last_visit:
                    value = measurements[rid][visits[-1]][predict_biomarker]
                    scantime = measurements[rid][visits[-1]]['scantime']
                    progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr)
                    mean_quantile = model.approximate_quantile(progress, value)
                else:
                    mean_quantile = 0.0
                    for visit in visits:
                        value = measurements[rid][visit][predict_biomarker]
                        scantime = measurements[rid][visit]['scantime']
                        progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr)
                        mean_quantile += model.approximate_quantile(progress, value)
                    mean_quantile /= len(visits)

                value_model = model.get_value_at_quantile(progress_next_visit, mean_quantile)
                values_model.append(value_model)

                # Predict biomarker value naively
                if naive_use_diagnosis:
                    mean_change = mean_changes[predict_biomarker][diagnosis]
                else:
                    mean_change = mean_changes[predict_biomarker][0.66]

                if use_last_visit:
                    x = measurements[rid][visits[-1]]['scantime']
                    y = measurements[rid][visits[-1]][predict_biomarker]
                    intercept = -(mean_change * x - y)
                else:
                    x = np.zeros(len(visits))
                    y = np.zeros(len(visits))
                    for i, visit in enumerate(visits):
                        x[i] = measurements[rid][visit]['scantime']
                        y[i] = measurements[rid][visit][predict_biomarker]
                    intercept = -np.sum(mean_change * x - y) / len(x)

                value_naive = intercept + mean_change * measurements[rid][predict_visit]['scantime']
                values_naive.append(value_naive)

                # Plot estimates
                plot = True
                if plot and diagnosis > 0.0 and dpr > 0.0:
                    plot_predictions(predict_biomarker, model, visits, measurements[rid], dpi, dpr,
                                     value_model, value_naive,
                                     mean_quantile, mean_change, intercept, rid)

                # Append rid and diagnosis
                rids.append(rid)
                diagnoses.append(diagnosis)

                # Print result
                print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format(predict_biomarker, rid, value_observed, value_naive, value_model)

        # Save results
        print log.INFO, 'Saving {0} predictions to {1}...'.format(predict_biomarker, prediction_file)
        pickle.dump((rids, diagnoses, values_observed, values_naive, values_model), open(prediction_file, 'wb'))

    rids = np.array(rids)
    diagnoses = np.array(diagnoses)
    values_observed = np.array(values_observed)
    values_naive = np.array(values_naive)
    values_model = np.array(values_model)

    # Exclude healthy subjects
    if exclude_cn:
        indices = np.where(diagnoses > 0.25)
        rids = rids[indices]
        diagnoses = diagnoses[indices]
        values_observed = values_observed[indices]
        values_naive = values_naive[indices]
        values_model = values_model[indices]

    return rids, diagnoses, values_observed, values_naive, values_model
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for')
    parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted')
    parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained')
    parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename')
    args = parser.parse_args()

    # Collect data for test
    data_handler = DataHandler.get_data_handler(method=args.method,
                                                biomarkers=args.biomarkers,
                                                phase=args.phase)
    biomarkers = data_handler.get_biomarker_names()
    measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12'],
                                                         biomarkers=biomarkers,
                                                         select_training_set=True,
                                                         select_complete=True)

    # Setup plotting folder
    eval_folder = DataHandler.make_dir(data_handler.get_eval_folder(), 'quants')

    # Process all biomarkers
    for biomarker in biomarkers:
        print log.INFO, 'Generating quantile correlation plot for {0}...'.format(biomarker)
        model_file = data_handler.get_model_file(biomarker)
        pm = ProgressionModel(biomarker, model_file)

        q_file = os.path.join(eval_folder, '{0}.p'.format(biomarker))

        if os.path.isfile(q_file):
            (q_bl, q_m12) = pickle.load(open(q_file, 'rb'))
        else:
            q_bl = []
            q_m12 = []

            for rid in measurements:
                val_bl = measurements[rid]['bl'][biomarker]
                val_m12 = measurements[rid]['m12'][biomarker]

                p_bl = measurements[rid]['bl']['progress']
                p_m12 = measurements[rid]['m12']['progress']

                q_bl.append(pm.approximate_quantile(p_bl, val_bl))
                q_m12.append(pm.approximate_quantile(p_m12, val_m12))

            pickle.dump((q_bl, q_m12), open(q_file, 'wb'))

        # Setup plot
        fig, axs = plt.subplots(1, 2)
        plt.suptitle('Correlation between bl and m12 quantiles')

        # Plot 1
        ax = axs[0]
        pt.setup_axes(plt, ax, yspine=True)
        ax.set_xlabel('Quantile bl')
        ax.set_ylabel('Quantile m12')

        ax.scatter(q_bl, q_m12, edgecolor='none', s=25.0, alpha=0.5)

        # Plot 2
        q_bl = np.array(q_bl)
        q_m12 = np.array(q_m12)

        errors = q_bl - q_m12
        loc, scale = norm.fit(errors, floc=0.0)

        ax = axs[1]
        pt.setup_axes(plt, ax)
        ax.set_xlabel('Difference bl to m12')
        ax.set_ylabel('Probability')
        ax.set_xlim(-1.05, 1.05)
        ax.hist(errors, bins=15, normed=True, histtype='stepfilled', alpha=0.3)
        x = np.linspace(-1.0, 1.0, 100)
        ax.plot(x, norm.pdf(x, loc=loc, scale=scale), color='k')

        # Draw or save the plot
        plt.tight_layout()
        if args.save_plots:
            plot_file = os.path.join(eval_folder, '{0}.pdf'.format(biomarker))
            plt.savefig(plot_file, transparent=True)
        else:
            plt.show()
        plt.close(fig)