def get_model_differences(args, data_handler, biomarker, offsets): print log.INFO, 'Comparing models for {0}...'.format(biomarker) model_file = data_handler.get_model_file(biomarker) print model_file if not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}'.format(model_file) return donohue_model_file = os.path.join(data_handler._conf.models_folder, 'denohue', 'population_{0}.csv'.format(biomarker.replace(' ', '.'))) if not os.path.isfile(donohue_model_file): print log.ERROR, 'Donohue Model file not found: {0}'.format(model_file) return # Read Donohue model r = mlab.csv2rec(donohue_model_file) progrs = r[r.dtype.names[0]] * 30.44 progrs = progrs[::100] vals_donohue = r[r.dtype.names[1]] vals_donohue = vals_donohue[::100] # Read my model pm = ProgressionModel(biomarker, model_file, extrapolator=args.extrapolator) diffs = np.empty(len(offsets)) for i, offset in enumerate(offsets): vals_mine = pm.get_quantile_curve(progrs + offset, 0.5) normalizer = max(np.max(vals_mine), np.max(vals_donohue)) diffs[i] = np.mean(np.abs(vals_donohue - vals_mine)) /normalizer return diffs
def get_model_differences(args, data_handler, biomarker, offsets): print log.INFO, 'Comparing models for {0}...'.format(biomarker) model_file = data_handler.get_model_file(biomarker) print model_file if not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}'.format(model_file) return donohue_model_file = os.path.join( data_handler._conf.models_folder, 'denohue', 'population_{0}.csv'.format(biomarker.replace(' ', '.'))) if not os.path.isfile(donohue_model_file): print log.ERROR, 'Donohue Model file not found: {0}'.format(model_file) return # Read Donohue model r = mlab.csv2rec(donohue_model_file) progrs = r[r.dtype.names[0]] * 30.44 progrs = progrs[::100] vals_donohue = r[r.dtype.names[1]] vals_donohue = vals_donohue[::100] # Read my model pm = ProgressionModel(biomarker, model_file, extrapolator=args.extrapolator) diffs = np.empty(len(offsets)) for i, offset in enumerate(offsets): vals_mine = pm.get_quantile_curve(progrs + offset, 0.5) normalizer = max(np.max(vals_mine), np.max(vals_donohue)) diffs[i] = np.mean(np.abs(vals_donohue - vals_mine)) / normalizer return diffs
def evaluate_synth_model(model_file, biomarker, progress_linspace, number_of_value_steps, metric='area'): # Define progress steps pm = ProgressionModel(biomarker, model_file) progresses = np.linspace(progress_linspace[0], progress_linspace[1], progress_linspace[2]) # Define value steps min_val = float('inf') max_val = float('-inf') for quantile in [0.01, 0.99]: curve = pm.get_quantile_curve(progresses, quantile) min_val = min(min_val, np.min(curve)) max_val = max(max_val, np.max(curve)) values = np.linspace(min_val, max_val, number_of_value_steps) # Get mean error error = 0 if metric == 'area': for progr in progresses: probs_model = [SynthModel.get_probability(biomarker, progr, v) for v in values] probs_fit = pm.get_density_distribution(values, progr) error += np.sum(np.abs(np.array(probs_fit) - np.array(probs_model))) error *= (values[1] - values[0]) / len(progresses) elif metric == 'peakdist': for progr in progresses: probs_model = [SynthModel.get_probability(biomarker, progr, v) for v in values] probs_fit = pm.get_density_distribution(values, progr) peak_model = values[np.argsort(probs_model)[-1]] peak_fit = values[np.argsort(probs_fit)[-1]] error += np.abs(peak_fit - peak_model) error /= len(progresses) elif metric == 'maxdist': for value in values: probs_model = [SynthModel.get_probability(biomarker, p, value) for p in progresses] probs_fit = [pm.get_density_distribution([value], p) for p in progresses] max_model = progresses[np.argsort(probs_model)[-1]] max_fit = progresses[np.argsort(probs_fit)[-1]] error += np.abs(max_fit - max_model) error /= len(values) else: print log.ERROR, 'Metric unknown: {0}'.format(metric) return error
def evaluate_biomarker_disc(args, data_handler, biomarker): model_file = data_handler.get_model_file(biomarker) eval_file = model_file.replace('.csv', '_eval_{0}.csv'.format(args.metric)) if os.path.isfile(eval_file): print log.SKIP, 'Evaluation file already existing: {0}'.format(eval_file) elif not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}!'.format(model_file) else: model = ProgressionModel(biomarker, model_file) fitter = ModelFitter(model) # Determine value and progress interval min_value, max_value = model.get_value_range(quantiles=args.quantiles) values = np.linspace(min_value, max_value, args.value_samples) progresses = np.linspace(model.min_progress, model.max_progress, args.progress_samples) print log.INFO, 'Evaluating {0} steps in value interval [{1}, {2}]...'.format(args.value_samples, min_value, max_value) print log.INFO, 'Evaluating {0} steps in progress interval [{1}, {2}]...'.format(args.progress_samples, model.min_progress, model.max_progress) value_step = values[1] - values[0] # Compute error writer = csv.writer(open(eval_file, 'wb'), delimiter=',') writer.writerow(['progress', 'error']) total_error = 0 for progress in progresses: sample_error = 0 for value in values: prob_value = model.get_probability_value(value, progress) samples = {'bl': {'scantime': 0, biomarker: value}} estimated_dpi = fitter.get_dpi_for_samples(samples, phase=args.args) sample_error += prob_value * np.square(progress - estimated_dpi) sample_error = math.sqrt(value_step * sample_error / len(values)) total_error += sample_error writer.writerow([progress, sample_error]) print log.RESULT, 'Error for progress {0}: {1}'.format(progress, sample_error) total_error /= len(progresses) print log.RESULT, 'Total error: {0}'.format(total_error)
def evaluate_synth_model(model_file, biomarker, progress_linspace, number_of_value_steps, metric='area'): # Define progress steps pm = ProgressionModel(biomarker, model_file) progresses = np.linspace(progress_linspace[0], progress_linspace[1], progress_linspace[2]) # Define value steps min_val = float('inf') max_val = float('-inf') for quantile in [0.01, 0.99]: curve = pm.get_quantile_curve(progresses, quantile) min_val = min(min_val, np.min(curve)) max_val = max(max_val, np.max(curve)) values = np.linspace(min_val, max_val, number_of_value_steps) # Get mean error error = 0 if metric == 'area': for progr in progresses: probs_model = [ SynthModel.get_probability(biomarker, progr, v) for v in values ] probs_fit = pm.get_density_distribution(values, progr) error += np.sum( np.abs(np.array(probs_fit) - np.array(probs_model))) error *= (values[1] - values[0]) / len(progresses) elif metric == 'peakdist': for progr in progresses: probs_model = [ SynthModel.get_probability(biomarker, progr, v) for v in values ] probs_fit = pm.get_density_distribution(values, progr) peak_model = values[np.argsort(probs_model)[-1]] peak_fit = values[np.argsort(probs_fit)[-1]] error += np.abs(peak_fit - peak_model) error /= len(progresses) elif metric == 'maxdist': for value in values: probs_model = [ SynthModel.get_probability(biomarker, p, value) for p in progresses ] probs_fit = [ pm.get_density_distribution([value], p) for p in progresses ] max_model = progresses[np.argsort(probs_model)[-1]] max_fit = progresses[np.argsort(probs_fit)[-1]] error += np.abs(max_fit - max_model) error /= len(values) else: print log.ERROR, 'Metric unknown: {0}'.format(metric) return error
def evaluate_biomarker_cover(args, data_handler, biomarker): model_file = data_handler.get_model_file(biomarker) eval_file = model_file.replace('.csv', '_eval_{0}.csv'.format(args.metric)) if os.path.isfile(eval_file) and not args.recompute_metric: print log.SKIP, 'Evaluation file already existing: {0}'.format(eval_file) elif not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}!'.format(model_file) else: model = ProgressionModel(biomarker, model_file) # Determine value and progress interval progresses = np.linspace(model.min_progress, model.max_progress, args.progress_samples) median_curve = model.get_quantile_curve(progresses, 0.5) min_value = np.min(median_curve) max_value = np.max(median_curve) print log.INFO, 'Evaluating {0} steps in progress interval [{1}, {2}] for values in [{3}, {4}]...'.format( args.progress_samples, progresses[0], progresses[-1], min_value, max_value) # Compute error writer = csv.writer(open(eval_file, 'wb'), delimiter=',') writer.writerow(['progress', 'error']) # Compute error total_error = 0 for progress in progresses: min_q = model.approximate_quantile(progress, min_value) max_q = model.approximate_quantile(progress, max_value) quantile_range = max_q - min_q total_error += quantile_range writer.writerow([progress, quantile_range]) total_error /= len(progresses) print log.RESULT, 'Total error {0}: {1}'.format(biomarker, total_error)
def evaluate_experiment(args, biomarker, sampling, viscodes=[0]): print log.INFO, 'Evaluating {0} model with {1} samples...'.format(biomarker, args.number_of_training_samples) num_visits = max(viscodes) + 1 errors_experiment = [] for run in xrange(args.number_of_runs): data_handler = SynthDataHandler() model_file = data_handler.get_model_file(biomarker, num_samples=args.number_of_training_samples, sampling=sampling, run=run) error_folder = SynthDataHandler.make_dir(data_handler.get_eval_folder(), biomarker) error_file = os.path.join(error_folder, os.path.basename(model_file).replace('.csv', '_test.p')) if num_visits > 1: error_file = error_file.replace('_test.p', 'v{0}_test.p'.format(num_visits)) if os.path.isfile(error_file) and not args.recompute_errors: print log.SKIP, 'Skipping error computation for {0} samples {1}, run {2}'.format( args.number_of_training_samples, sampling, run) errors_run = pickle.load(open(error_file, 'rb')) else: # Generate model st.generate_synth_model(biomarker, recompute_models=args.recompute_models, num_samples=args.number_of_training_samples, sampling=sampling, run=run) # Initialise fitter fitter = ModelFitter(ProgressionModel(biomarker, model_file)) # Generate test data test_data = st.generate_synth_test_data([biomarker], args.number_of_test_samples, num_visits, run, recompute_test_data=args.recompute_test_data) errors_run = st.evaluate_synth_fitting(fitter, test_data, [biomarker], viscodes) pickle.dump(errors_run, open(error_file, 'wb')) errors_experiment.append(np.mean(errors_run)) return errors_experiment
def plot_biomarker(data_handler, biomarker, measurements, dpi, dpr): """ Plot the model of one biomarker with the fitted values :param data_handler: the data handler :param biomarker: the biomarker to plot :param measurements: the measurements containing the biomarker samples of one subject :param dpi: the estimated DPI :param dpr: the estimated DPR """ model_file = data_handler.get_model_file(biomarker) if not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}'.format(model_file) return print log.INFO, 'Generating plot for {0}...'.format(biomarker) # # Read model # pm = ProgressionModel(biomarker, model_file) progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress) min_progress_extrapolate = int(pm.min_progress - progress_extrapolate) max_progress_extrapolate = int(pm.max_progress + progress_extrapolate) progress_linspace_ex1 = np.linspace(min_progress_extrapolate, pm.min_progress, 20) progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60) progress_linspace_ex2 = np.linspace(pm.max_progress, max_progress_extrapolate, 20) # # Setup plot # biomarker_string = pt.get_biomarker_string(biomarker) figure_width = 6 fig = plt.figure(figsize=(figure_width, 5)) ax1 = plt.subplot(1, 1, 1) pt.setup_axes(plt, ax1, xgrid=False, ygrid=False) ax1.set_title( 'Model for {0} with fitted sample values'.format(biomarker_string)) ax1.set_xlabel('Disease progress (days before/after conversion to MCI)') ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker)) ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate) # # Plot the percentile curves of the fitted model # ax1.axvline(pm.min_progress, color='0.15', linestyle=':') ax1.axvline(pm.max_progress, color='0.15', linestyle=':') quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] grey_values = ['0.4', '0.2', '0', '0.2', '0.4'] for grey_value, quantile in zip(grey_values, quantiles): curve_int = pm.get_quantile_curve(progress_linspace_int, quantile) ax1.plot(progress_linspace_int, curve_int, color=grey_value) curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile) curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile) ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value) ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value) label = 'q = {0}'.format(quantile * 100) ax1.text(progress_linspace_int[-1] + 100, curve_int[-1], label, fontsize=10) # # Plot points # progr_points = [] value_points = [] diagn_points = [] for visit in measurements[0]: if biomarker in measurements[0][visit]: progress = measurements[0][visit]['scantime'] * dpr + dpi value = measurements[0][visit][biomarker] progr_points.append(progress) value_points.append(value) diagn_points.append(1.0) ax1.axvline(progress, color='b', linestyle='--') ax1.text(progress + 150, value, visit, color='b', fontsize=10) ax1.scatter(progr_points, value_points, s=25.0, color='b', edgecolor='none', vmin=0.0, vmax=1.0, alpha=0.9) # # Draw or save the plot # plt.tight_layout() plt.show() plt.close(fig)
def evaluate_curves(biomarker_name, num_samples=200, show_plots=False, csig=0): biomarker = 'synth_{0}'.format(biomarker_name) print log.INFO, 'Evaluating {0} for {1} samples, csig={2}...'.format(biomarker, num_samples, csig) donohue_model_path = '/Development/DiseaseProgressionModel/models/donohue/' vgam_model_path = '/Development/DiseaseProgressionModel/models/synth/' # Setup plot if show_plots: fig = plt.figure() ax = plt.subplot(1, 1, 1) pt.setup_axes(plt, ax, xgrid=False, ygrid=False) ax.set_title('') ax.set_xlabel('') else: fig = None ax = None # Initialise values offset_donohue = 0 # 182.5 errors_donohue = [] errors_vgam_mean = [] errors_vgam_median = [] # Get real curve values progress_linspace = np.linspace(-1500, 1500) mean_curve = [SynthModel.get_mean_value(biomarker, p) for p in progress_linspace] median_curve = [SynthModel.get_distributed_value(biomarker, p, cdf=0.5) for p in progress_linspace] # Plot synthetic model curve if show_plots: progress_linspace_synth = np.linspace(-2500, 2500, 100) quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] alphas = [0.4, 0.7, 1.0, 0.7, 0.4] for quantile, alpha in zip(quantiles, alphas): curve_synth = [SynthModel.get_distributed_value(biomarker, p, cdf=quantile) for p in progress_linspace_synth] ax.plot(progress_linspace_synth, curve_synth, color='b', alpha=alpha) curve_synth = [SynthModel.get_mean_value(biomarker, p) for p in progress_linspace_synth] ax.plot(progress_linspace_synth, curve_synth, '--', color='b') # Get values for mean calculation end_values = [SynthModel.get_distributed_value(biomarker, progress_linspace[0], cdf=0.001), SynthModel.get_distributed_value(biomarker, progress_linspace[-1], cdf=0.001), SynthModel.get_distributed_value(biomarker, progress_linspace[0], cdf=0.999), SynthModel.get_distributed_value(biomarker, progress_linspace[-1], cdf=0.999)] values = np.linspace(min(end_values), max(end_values), 100) for run in range(100): # Get Donohue model donohue_file = os.path.join(donohue_model_path, 'population_value-{0}_csig{1}_run{2}.csv'.format(biomarker_name, csig, run)) r = mlab.csv2rec(donohue_file) progrs = r[r.dtype.names[0]] - offset_donohue vals = r[r.dtype.names[1]] curve_donohue = [] progr_donohue = [] for p in progress_linspace: if progrs[0] < p < progrs[-1]: i = 1 while p > progrs[i]: i += 1 progr_donohue.append(float(progrs[i])) curve_donohue.append(float(vals[i])) else: print log.WARNING, 'Model scope too small... skipping!' continue # Get VGAM model if csig == 0: vgam_model_file = os.path.join(vgam_model_path, '{0}_model_{1}_longitudinal_{2}.csv'.format(biomarker, num_samples, run)) else: vgam_model_file = os.path.join(vgam_model_path, '{0}_model_{1}_longitudinal_csig{2}.0_{3}.csv'.format(biomarker, num_samples, csig, run)) pm = ProgressionModel(biomarker, vgam_model_file) curve_vgam_median = pm.get_quantile_curve(progress_linspace, 0.5) curve_vgam_mean = [np.sum(pm.get_density_distribution(values, p) * values / np.sum(pm.get_density_distribution(values, p))) for p in progress_linspace] # Calculate errors errors_donohue.append(np.mean(np.abs(np.array(curve_donohue) - np.array(mean_curve)))) errors_vgam_mean.append(np.mean(np.abs(np.array(curve_vgam_mean) - np.array(mean_curve)))) errors_vgam_median.append(np.mean(np.abs(np.array(curve_vgam_median) - np.array(median_curve)))) if show_plots: ax.plot(progr_donohue, curve_donohue, '--', color='g', alpha=0.2, linewidth=2) # ax.plot(progress_linspace, curve_vgam_median, '-', color='r', alpha=0.2, linewidth=2) ax.plot(progress_linspace, curve_vgam_mean, '--', color='r', alpha=0.2, linewidth=2) print log.RESULT, 'Donohue (mean):', np.mean(errors_donohue), np.var(errors_donohue) print log.RESULT, 'VGAM (mean):', np.mean(errors_vgam_mean), np.var(errors_vgam_mean) print log.RESULT, 'VGAM (median):', np.mean(errors_vgam_median), np.var(errors_vgam_median) # Draw or save the plot if show_plots: plt.tight_layout() plt.show() plt.close(fig)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12'], biomarkers=biomarkers, select_training_set=True, select_complete=True) # Setup plotting folder eval_folder = DataHandler.make_dir(data_handler.get_eval_folder(), 'quants') # Process all biomarkers for biomarker in biomarkers: print log.INFO, 'Generating quantile correlation plot for {0}...'.format(biomarker) model_file = data_handler.get_model_file(biomarker) pm = ProgressionModel(biomarker, model_file) q_file = os.path.join(eval_folder, '{0}.p'.format(biomarker)) if os.path.isfile(q_file): (q_bl, q_m12) = pickle.load(open(q_file, 'rb')) else: q_bl = [] q_m12 = [] for rid in measurements: val_bl = measurements[rid]['bl'][biomarker] val_m12 = measurements[rid]['m12'][biomarker] p_bl = measurements[rid]['bl']['progress'] p_m12 = measurements[rid]['m12']['progress'] q_bl.append(pm.approximate_quantile(p_bl, val_bl)) q_m12.append(pm.approximate_quantile(p_m12, val_m12)) pickle.dump((q_bl, q_m12), open(q_file, 'wb')) # Setup plot fig, axs = plt.subplots(1, 2) plt.suptitle('Correlation between bl and m12 quantiles') # Plot 1 ax = axs[0] pt.setup_axes(plt, ax, yspine=True) ax.set_xlabel('Quantile bl') ax.set_ylabel('Quantile m12') ax.scatter(q_bl, q_m12, edgecolor='none', s=25.0, alpha=0.5) # Plot 2 q_bl = np.array(q_bl) q_m12 = np.array(q_m12) errors = q_bl - q_m12 loc, scale = norm.fit(errors, floc=0.0) ax = axs[1] pt.setup_axes(plt, ax) ax.set_xlabel('Difference bl to m12') ax.set_ylabel('Probability') ax.set_xlim(-1.05, 1.05) ax.hist(errors, bins=15, normed=True, histtype='stepfilled', alpha=0.3) x = np.linspace(-1.0, 1.0, 100) ax.plot(x, norm.pdf(x, loc=loc, scale=scale), color='k') # Draw or save the plot plt.tight_layout() if args.save_plots: plot_file = os.path.join(eval_folder, '{0}.pdf'.format(biomarker)) plt.savefig(plot_file, transparent=True) else: plt.show() plt.close(fig)
def plot_model(args, data_handler, biomarker): model_file = data_handler.get_model_file(biomarker) if not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}'.format(model_file) return print log.INFO, 'Generating plot for {0}...'.format(biomarker) plot_synth_model = args.plot_synth_model and biomarker in SynthModel.get_biomarker_names( ) # # Read model # pm = ProgressionModel(biomarker, model_file, extrapolator=args.extrapolator) progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress) min_progress_extrapolate = int(pm.min_progress - progress_extrapolate) max_progress_extrapolate = int(pm.max_progress + progress_extrapolate) progress_linspace_ex1 = np.linspace(min_progress_extrapolate, pm.min_progress, 20) progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60) progress_linspace_ex2 = np.linspace(pm.max_progress, max_progress_extrapolate, 20) # Calc min and max val in interval between 1% and 99% percentie min_val, max_val = pm.get_value_range([0.1, 0.9]) # progress_linspace = np.linspace(min_progress_extrapolate, max_progress_extrapolate, 100) # min_val = float('inf') # max_val = float('-inf') # for quantile in [0.1, 0.9]: # curve = pm.get_quantile_curve(progress_linspace, quantile) # min_val = min(min_val, np.min(curve)) # max_val = max(max_val, np.max(curve)) # # Setup plot # biomarker_string = pt.get_biomarker_string(biomarker) figure_width = 6 if args.no_densities or args.only_densities else 12 fig = plt.figure(figsize=(figure_width, 5)) if args.only_densities: ax1 = None ax2 = plt.subplot(1, 1, 1) pt.setup_axes(plt, ax2, xgrid=False, ygrid=False) elif args.no_densities: ax1 = plt.subplot(1, 1, 1) ax2 = None pt.setup_axes(plt, ax1, xgrid=False, ygrid=False) else: ax1 = plt.subplot(1, 2, 1) ax2 = plt.subplot(1, 2, 2) pt.setup_axes(plt, ax1, xgrid=False, ygrid=False) pt.setup_axes(plt, ax2) if not args.only_densities: if args.no_model and not args.plot_synth_model: ax1.set_title('Aligned samples for {0}'.format(biomarker_string)) else: ax1.set_title('Quantile curves for {0}'.format(biomarker_string)) if args.phase == 'mciad': ax1.set_xlabel( 'Disease progress (days before/after conversion to AD)') else: ax1.set_xlabel( 'Disease progress (days before/after conversion to MCI)') ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker)) if args.xlim is not None: ax1.set_xlim(args.xlim[0], args.xlim[1]) else: ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate) if args.ylim is not None: ax1.set_ylim(args.ylim[0], args.ylim[1]) # # Plot the percentile curves of the fitted model # if not args.no_model and not args.only_densities: ax1.axvline(pm.min_progress, color='0.15', linestyle=':') ax1.axvline(pm.max_progress, color='0.15', linestyle=':') quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] grey_values = ['0.4', '0.2', '0', '0.2', '0.4'] for grey_value, quantile in zip(grey_values, quantiles): curve_int = pm.get_quantile_curve(progress_linspace_int, quantile) ax1.plot(progress_linspace_int, curve_int, color=grey_value) if not args.no_extrapolation: curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile) curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile) ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value) ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value) if args.plot_quantile_label: label = '$q={0}\%$'.format(quantile * 100) ax1.text(progress_linspace_int[-1] + 10, curve_int[-1], label, fontsize=10) if args.plot_donohue: print 'Plotting Donohue' donohue_file = os.path.join( data_handler._conf.models_folder, 'donohue', 'population_{0}.csv'.format(biomarker.replace(' ', '.'))) if not os.path.isfile(donohue_file): print log.ERROR, 'Donohue model file not found: {0}'.format( donohue_file) return r = mlab.csv2rec(donohue_file) if args.method == 'joint': offset = 2200 else: offset = 300 progrs = r[r.dtype.names[0]] * 30.44 + offset vals = r[r.dtype.names[1]] curve_donohue = [] progr_donohue = [] for p in progress_linspace_int: if progrs[0] < p < progrs[-1]: i = 1 while p > progrs[i]: i += 1 # TODO linear interpolation progr_donohue.append(progrs[i]) curve_donohue.append(vals[i]) ax1.plot(progr_donohue, curve_donohue, '--', color='b', linewidth=2) # # Plot synthetic model curve # if plot_synth_model: progress_linspace_synth = np.linspace(-2500, 2500, 100) quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] alphas = [0.4, 0.7, 1.0, 0.7, 0.4] for quantile, alpha in zip(quantiles, alphas): curve_synth = [ SynthModel.get_distributed_value(biomarker, p, cdf=quantile) for p in progress_linspace_synth ] ax1.plot(progress_linspace_synth, curve_synth, color='b', alpha=alpha) # # Plot predictor function # if args.plot_eta is not None and not args.only_densities: # Get second axis of plot 1 ax1b = ax1.twinx() # Plot all progresses # ax1b.scatter(pm.all_progresses, pm.all_mus, facecolor='b', marker='o', edgecolor='none', alpha=0.2) ax1b.text(pm.progresses[-1], pm.sigmas[-1], '$\mu$', color='b', fontsize=11) # Plot binned progresses ax1b.scatter(pm.progresses, pm.sigmas, color='b', marker='x') # Plot interpolated model mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_int] ax1b.plot(progress_linspace_int, mus, color='b') if not args.no_extrapolation: mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex1] ax1b.plot(progress_linspace_ex1, mus, '--', color='b') mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex2] ax1b.plot(progress_linspace_ex2, mus, '--', color='b') if args.xlim is not None: ax1b.set_xlim(args.xlim[0], args.xlim[1]) else: ax1b.set_xlim(min_progress_extrapolate, max_progress_extrapolate) # # Plot errors # if args.plot_errors and not args.only_densities: eval_file = model_file.replace('.csv', '_eval_cover.csv') if not os.path.isfile(eval_file): print log.ERROR, 'Evaluation file not found: {0}'.format(eval_file) else: m = mlab.csv2rec(eval_file) progresses = m['progress'] errors = m['error'] # Get second axis of plot 1 ax1b = ax1.twinx() # ax1b.set_ylim(0, max(150, 1.2 * np.max(errors))) ax1b.plot(progresses, errors, color='g', marker='x') ax1b.text(progresses[-1], errors[-1], 'Discr.', color='g', fontsize=11) ax1b.axhline(np.mean(errors), color='g', linestyle='--', alpha=0.5) median_curve = pm.get_quantile_curve(progresses, 0.5) min_value = np.min(median_curve) max_value = np.max(median_curve) rect = mpl.patches.Rectangle((progresses[0], min_value), progresses[-1] - progresses[0], max_value - min_value, fc=(0.0, 0.5, 0.0, 0.1), ec=(0.0, 0.5, 0.0, 0.8), linewidth=1) ax1.add_patch(rect) # # Plot points # if not args.no_points and not args.only_densities: samples_file = data_handler.get_samples_file(biomarker) if not os.path.isfile(samples_file): print log.ERROR, 'Samples file not found: {0}'.format(samples_file) else: m = mlab.csv2rec(samples_file) progr_points = m['progress'] value_points = m['value'] # diagn_points = [0.5 if p < 0 else 1.0 for p in progr_points] diagn_points = m['diagnosis'] diagn_points[(0.25 <= diagn_points) & (diagn_points <= 0.75)] = 0.5 print log.INFO, 'Plotting {0} sample points...'.format( len(progr_points)) ax1.scatter(progr_points, value_points, s=15.0, c=diagn_points, edgecolor='none', vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=args.points_alpha) if args.phase == 'cnmci': rects = [ mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_cn + (args.points_alpha, ), linewidth=0), mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha, ), linewidth=0) ] labels = ['CN', 'MCI'] elif args.phase == 'mciad': rects = [ mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha, ), linewidth=0), mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_ad + (args.points_alpha, ), linewidth=0) ] labels = ['MCI', 'AD'] else: rects = [ mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_cn + (args.points_alpha, ), linewidth=0), mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha, ), linewidth=0), mpl.patches.Rectangle( (0, 0), 1, 1, fc=pt.color_ad + (args.points_alpha, ), linewidth=0) ] labels = ['CN', 'MCI', 'AD'] legend = ax1.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # # Plot PDFs # progr_samples = [-2000, -1000, 0, 1000, 2000, 3000, 4000] if args.phase == 'joint' else \ [-2000, -1500, -1000, -500, 0, 500, 1000, 1500, 2000] if args.phase == 'cnmci': vmin = -2000 vmax = 6000 elif args.phase == 'mciad': vmin = -6000 vmax = 2000 elif args.phase == 'joint': vmin = -2000 vmax = 4000 sample_cmap = cmx.ScalarMappable(norm=colors.Normalize(vmin=vmin, vmax=vmax), cmap=plt.get_cmap(pt.progression_cmap)) if not args.no_sample_lines and not args.only_densities: for progr in progr_samples: if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress: # sample_color = sample_cmap.to_rgba(progr_samples.index(progr)) sample_color = sample_cmap.to_rgba(progr) linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-' ax1.axvline(progr, color=sample_color, linestyle=linestyle, alpha=0.3) if not args.no_densities: ax2.set_title( 'Probability density function for {0}'.format(biomarker_string)) ax2.set_xlabel(DataHandler.get_biomarker_unit(biomarker)) ax2.set_ylabel('Probability') if args.ylim is None: values = np.linspace(min_val, max_val, 250) ax2.set_xlim(min_val, max_val) else: values = np.linspace(args.ylim[0], args.ylim[1], 250) ax2.set_xlim(args.ylim[0], args.ylim[1]) for progr in progr_samples: if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress: # sample_color = sample_cmap.to_rgba(progr_samples.index(progr)) sample_color = sample_cmap.to_rgba(progr) linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-' probs = pm.get_density_distribution(values, progr) ax2.plot(values, probs, label=str(progr), color=sample_color, linestyle=linestyle) if plot_synth_model: probs = [ SynthModel.get_probability(biomarker, progr, v) for v in values ] ax2.plot(values, probs, color='b', linestyle='--') legend = ax2.legend(fontsize=10, loc='best', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # # Draw or save the plot # plt.tight_layout() if args.save_plots or args.plot_file is not None: if args.plot_file is not None: plot_filename = args.plot_file else: plot_filename = model_file.replace('.csv', '.pdf') plt.savefig(plot_filename, transparent=True) else: plt.show() plt.close(fig)
def get_fitting_data(args, data_handler_joint): biomarkers = data_handler_joint.get_biomarker_names() offsets = range(args.search_range[0], args.search_range[1], args.search_range[2]) errors_file = os.path.join(data_handler_joint.get_eval_folder(), 'offset_errors_{0}.p'.format(args.extrapolator)) if os.path.isfile(errors_file) and not args.recompute_errors: print log.INFO, 'Reading errors estimations from file {0}...'.format(errors_file) (errors, descriminativeness, overlap) = pickle.load(open(errors_file, 'rb')) else: data_handler_1 = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase='cnmci') data_handler_2 = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase='mciad') errors = np.zeros((len(biomarkers), len(offsets))) descriminativeness = np.zeros(len(biomarkers)) overlap = [] for i, biomarker in enumerate(biomarkers): # Get error matrix for all biomarkers and offsets model_file_1 = data_handler_1.get_model_file(biomarker) model_file_2 = data_handler_2.get_model_file(biomarker) if os.path.isfile(model_file_1) and os.path.isfile(model_file_2): print log.INFO, 'Analysing {0}...'.format(biomarker) # Get discriminativeness for all biomarkers as a scaling factor eval_file_1 = model_file_1.replace('.csv', '_eval_cover.csv') eval_file_2 = model_file_2.replace('.csv', '_eval_cover.csv') if os.path.isfile(eval_file_1) and os.path.isfile(eval_file_2): descriminate_1 = np.mean(mlab.csv2rec(eval_file_1)['error']) descriminate_2 = np.mean(mlab.csv2rec(eval_file_2)['error']) descriminativeness[i] = 0.5 * (descriminate_1 + descriminate_2) else: print log.WARNING, 'Evaluation file missing for {0}'.format(biomarker) continue # Initialise models model_1 = ProgressionModel(biomarker, model_file_1, extrapolator=args.extrapolator) model_2 = ProgressionModel(biomarker, model_file_2, extrapolator=args.extrapolator) # Assemble errors for each offset min_val_1, max_val_1 = model_1.get_value_range([0.1, 0.9]) min_val_2, max_val_2 = model_2.get_value_range([0.1, 0.9]) values = np.linspace(min(min_val_1, min_val_2), max(max_val_1, max_val_2), 250) values_delta = (values.max() - values.min()) / len(values) for j, offset in enumerate(offsets): dens_11 = np.array(model_1.get_density_distribution(values, offset + model_2.min_progress)) dens_12 = np.array(model_2.get_density_distribution(values, model_2.min_progress)) dens_21 = np.array(model_1.get_density_distribution(values, model_1.max_progress)) dens_22 = np.array(model_2.get_density_distribution(values, -offset + model_1.max_progress)) errors[i, j] = 0.5 * values_delta * (np.sum(np.abs(dens_11 - dens_12)) + np.sum(np.abs(dens_21 - dens_22))) # Get overlap overlap.append(model_1.max_progress - model_2.min_progress) overlap = np.mean(overlap) print log.INFO, 'Saving errors to file {0}...'.format(errors_file) pickle.dump((errors, descriminativeness, overlap), open(errors_file, 'wb')) return biomarkers, offsets, errors, descriminativeness, overlap
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm12'], biomarkers=biomarkers, select_training_set=True, select_complete=True) # Setup plotting folder eval_folder = DataHandler.make_dir(data_handler.get_eval_folder(), 'quants') # Process all biomarkers for biomarker in biomarkers: print log.INFO, 'Generating quantile correlation plot for {0}...'.format( biomarker) model_file = data_handler.get_model_file(biomarker) pm = ProgressionModel(biomarker, model_file) q_file = os.path.join(eval_folder, '{0}.p'.format(biomarker)) if os.path.isfile(q_file): (q_bl, q_m12) = pickle.load(open(q_file, 'rb')) else: q_bl = [] q_m12 = [] for rid in measurements: val_bl = measurements[rid]['bl'][biomarker] val_m12 = measurements[rid]['m12'][biomarker] p_bl = measurements[rid]['bl']['progress'] p_m12 = measurements[rid]['m12']['progress'] q_bl.append(pm.approximate_quantile(p_bl, val_bl)) q_m12.append(pm.approximate_quantile(p_m12, val_m12)) pickle.dump((q_bl, q_m12), open(q_file, 'wb')) # Setup plot fig, axs = plt.subplots(1, 2) plt.suptitle('Correlation between bl and m12 quantiles') # Plot 1 ax = axs[0] pt.setup_axes(plt, ax, yspine=True) ax.set_xlabel('Quantile bl') ax.set_ylabel('Quantile m12') ax.scatter(q_bl, q_m12, edgecolor='none', s=25.0, alpha=0.5) # Plot 2 q_bl = np.array(q_bl) q_m12 = np.array(q_m12) errors = q_bl - q_m12 loc, scale = norm.fit(errors, floc=0.0) ax = axs[1] pt.setup_axes(plt, ax) ax.set_xlabel('Difference bl to m12') ax.set_ylabel('Probability') ax.set_xlim(-1.05, 1.05) ax.hist(errors, bins=15, normed=True, histtype='stepfilled', alpha=0.3) x = np.linspace(-1.0, 1.0, 100) ax.plot(x, norm.pdf(x, loc=loc, scale=scale), color='k') # Draw or save the plot plt.tight_layout() if args.save_plots: plot_file = os.path.join(eval_folder, '{0}.pdf'.format(biomarker)) plt.savefig(plot_file, transparent=True) else: plt.show() plt.close(fig)
def get_biomarker_predictions(visits, predict_biomarker, method=None, biomarkers=None, phase=None, recompute_estimates=False, recompute_predictions=False, estimate_dprs=False, select_test_set=False, consistent_data=False, exclude_cn=False, use_last_visit=False, naive_use_diagnosis=False): # Get prediction file data_handler = DataHandler.get_data_handler(method=method, biomarkers=biomarkers, phase=phase) predict_biomarker_str = predict_biomarker.replace(' ', '_') predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p' if biomarkers is None: predict_file_basename = predict_file_trunk.format(predict_biomarker_str, method, '_'.join(visits), '_last' if use_last_visit else '') else: estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_') predict_file_basename = predict_file_trunk.format(predict_biomarker_str, estimate_biomarkers_string, '_'.join(visits), '_last' if use_last_visit else '') prediction_file = os.path.join(data_handler.get_eval_folder(), predict_file_basename) # Read if predictions exist, else recompute if os.path.isfile(prediction_file) and not recompute_predictions: # Read biomarker predictions from file print log.INFO, 'Reading {0} predictions from {1}...'.format(predict_biomarker, prediction_file) (rids, diagnoses, values_observed, values_naive, values_model) = pickle.load(open(prediction_file, 'rb')) else: predict_visit = get_predicted_visit(visits) print log.INFO, 'Predicting {0} at {1}...'.format(predict_biomarker, predict_visit) # Get mean changes from file mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') if not os.path.isfile(mean_changes_file): print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!' mean_changes = pickle.load(open(mean_changes_file, 'rb')) # Get DPI estimates rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates(visits, method=method, biomarkers=biomarkers, phase=phase, recompute_estimates=recompute_estimates, estimate_dprs=estimate_dprs, select_test_set=select_test_set, consistent_data=consistent_data) # Collect biomarker data for test measurements = data_handler.get_measurements_as_dict(visits=visits + [predict_visit], biomarkers=[predict_biomarker], select_test_set=select_test_set, select_complete=True) model = ProgressionModel(predict_biomarker, data_handler.get_model_file(predict_biomarker)) print log.INFO, 'Predicting {0} for {1}'.format(predict_biomarker, predict_visit) rids = [] diagnoses = [] values_observed = [] values_model = [] values_naive = [] for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis, dprs): if rid in measurements: # Get real biomarker value value at next visit scantime_first_visit = measurements[rid][visits[0]]['scantime'] scantime_next_visit = measurements[rid][predict_visit]['scantime'] progress_next_visit = ModelFitter.scantime_to_progress(scantime_next_visit, scantime_first_visit, dpi, dpr) value_observed = measurements[rid][predict_visit][predict_biomarker] values_observed.append(value_observed) # Predict biomarker value value at next visit if use_last_visit: value = measurements[rid][visits[-1]][predict_biomarker] scantime = measurements[rid][visits[-1]]['scantime'] progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr) mean_quantile = model.approximate_quantile(progress, value) else: mean_quantile = 0.0 for visit in visits: value = measurements[rid][visit][predict_biomarker] scantime = measurements[rid][visit]['scantime'] progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr) mean_quantile += model.approximate_quantile(progress, value) mean_quantile /= len(visits) value_model = model.get_value_at_quantile(progress_next_visit, mean_quantile) values_model.append(value_model) # Predict biomarker value naively if naive_use_diagnosis: mean_change = mean_changes[predict_biomarker][diagnosis] else: mean_change = mean_changes[predict_biomarker][0.66] if use_last_visit: x = measurements[rid][visits[-1]]['scantime'] y = measurements[rid][visits[-1]][predict_biomarker] intercept = -(mean_change * x - y) else: x = np.zeros(len(visits)) y = np.zeros(len(visits)) for i, visit in enumerate(visits): x[i] = measurements[rid][visit]['scantime'] y[i] = measurements[rid][visit][predict_biomarker] intercept = -np.sum(mean_change * x - y) / len(x) value_naive = intercept + mean_change * measurements[rid][predict_visit]['scantime'] values_naive.append(value_naive) # Plot estimates plot = True if plot and diagnosis > 0.0 and dpr > 0.0: plot_predictions(predict_biomarker, model, visits, measurements[rid], dpi, dpr, value_model, value_naive, mean_quantile, mean_change, intercept, rid) # Append rid and diagnosis rids.append(rid) diagnoses.append(diagnosis) # Print result print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format(predict_biomarker, rid, value_observed, value_naive, value_model) # Save results print log.INFO, 'Saving {0} predictions to {1}...'.format(predict_biomarker, prediction_file) pickle.dump((rids, diagnoses, values_observed, values_naive, values_model), open(prediction_file, 'wb')) rids = np.array(rids) diagnoses = np.array(diagnoses) values_observed = np.array(values_observed) values_naive = np.array(values_naive) values_model = np.array(values_model) # Exclude healthy subjects if exclude_cn: indices = np.where(diagnoses > 0.25) rids = rids[indices] diagnoses = diagnoses[indices] values_observed = values_observed[indices] values_naive = values_naive[indices] values_model = values_model[indices] return rids, diagnoses, values_observed, values_naive, values_model
def plot_biomarker(data_handler, biomarker, measurements, dpi, dpr): """ Plot the model of one biomarker with the fitted values :param data_handler: the data handler :param biomarker: the biomarker to plot :param measurements: the measurements containing the biomarker samples of one subject :param dpi: the estimated DPI :param dpr: the estimated DPR """ model_file = data_handler.get_model_file(biomarker) if not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}'.format(model_file) return print log.INFO, 'Generating plot for {0}...'.format(biomarker) # # Read model # pm = ProgressionModel(biomarker, model_file) progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress) min_progress_extrapolate = int(pm.min_progress - progress_extrapolate) max_progress_extrapolate = int(pm.max_progress + progress_extrapolate) progress_linspace_ex1 = np.linspace(min_progress_extrapolate, pm.min_progress, 20) progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60) progress_linspace_ex2 = np.linspace(pm.max_progress, max_progress_extrapolate, 20) # # Setup plot # biomarker_string = pt.get_biomarker_string(biomarker) figure_width = 6 fig = plt.figure(figsize=(figure_width, 5)) ax1 = plt.subplot(1, 1, 1) pt.setup_axes(plt, ax1, xgrid=False, ygrid=False) ax1.set_title('Model for {0} with fitted sample values'.format(biomarker_string)) ax1.set_xlabel('Disease progress (days before/after conversion to MCI)') ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker)) ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate) # # Plot the percentile curves of the fitted model # ax1.axvline(pm.min_progress, color='0.15', linestyle=':') ax1.axvline(pm.max_progress, color='0.15', linestyle=':') quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] grey_values = ['0.4', '0.2', '0', '0.2', '0.4'] for grey_value, quantile in zip(grey_values, quantiles): curve_int = pm.get_quantile_curve(progress_linspace_int, quantile) ax1.plot(progress_linspace_int, curve_int, color=grey_value) curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile) curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile) ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value) ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value) label = 'q = {0}'.format(quantile * 100) ax1.text(progress_linspace_int[-1] + 100, curve_int[-1], label, fontsize=10) # # Plot points # progr_points = [] value_points = [] diagn_points = [] for visit in measurements[0]: if biomarker in measurements[0][visit]: progress = measurements[0][visit]['scantime'] * dpr + dpi value = measurements[0][visit][biomarker] progr_points.append(progress) value_points.append(value) diagn_points.append(1.0) ax1.axvline(progress, color='b', linestyle='--') ax1.text(progress + 150, value, visit, color='b', fontsize=10) ax1.scatter(progr_points, value_points, s=25.0, color='b', edgecolor='none', vmin=0.0, vmax=1.0, alpha=0.9) # # Draw or save the plot # plt.tight_layout() plt.show() plt.close(fig)
def plot_model(args, data_handler, biomarker): model_file = data_handler.get_model_file(biomarker) if not os.path.isfile(model_file): print log.ERROR, 'Model file not found: {0}'.format(model_file) return print log.INFO, 'Generating plot for {0}...'.format(biomarker) plot_synth_model = args.plot_synth_model and biomarker in SynthModel.get_biomarker_names() # # Read model # pm = ProgressionModel(biomarker, model_file, extrapolator=args.extrapolator) progress_extrapolate = 0.3 * (pm.max_progress - pm.min_progress) min_progress_extrapolate = int(pm.min_progress - progress_extrapolate) max_progress_extrapolate = int(pm.max_progress + progress_extrapolate) progress_linspace_ex1 = np.linspace(min_progress_extrapolate, pm.min_progress, 20) progress_linspace_int = np.linspace(pm.min_progress, pm.max_progress, 60) progress_linspace_ex2 = np.linspace(pm.max_progress, max_progress_extrapolate, 20) # Calc min and max val in interval between 1% and 99% percentie min_val, max_val = pm.get_value_range([0.1, 0.9]) # progress_linspace = np.linspace(min_progress_extrapolate, max_progress_extrapolate, 100) # min_val = float('inf') # max_val = float('-inf') # for quantile in [0.1, 0.9]: # curve = pm.get_quantile_curve(progress_linspace, quantile) # min_val = min(min_val, np.min(curve)) # max_val = max(max_val, np.max(curve)) # # Setup plot # biomarker_string = pt.get_biomarker_string(biomarker) figure_width = 6 if args.no_densities or args.only_densities else 12 fig = plt.figure(figsize=(figure_width, 5)) if args.only_densities: ax1 = None ax2 = plt.subplot(1, 1, 1) pt.setup_axes(plt, ax2, xgrid=False, ygrid=False) elif args.no_densities: ax1 = plt.subplot(1, 1, 1) ax2 = None pt.setup_axes(plt, ax1, xgrid=False, ygrid=False) else: ax1 = plt.subplot(1, 2, 1) ax2 = plt.subplot(1, 2, 2) pt.setup_axes(plt, ax1, xgrid=False, ygrid=False) pt.setup_axes(plt, ax2) if not args.only_densities: if args.no_model and not args.plot_synth_model: ax1.set_title('Aligned samples for {0}'.format(biomarker_string)) else: ax1.set_title('Quantile curves for {0}'.format(biomarker_string)) if args.phase == 'mciad': ax1.set_xlabel('Disease progress (days before/after conversion to AD)') else: ax1.set_xlabel('Disease progress (days before/after conversion to MCI)') ax1.set_ylabel(DataHandler.get_biomarker_unit(biomarker)) if args.xlim is not None: ax1.set_xlim(args.xlim[0], args.xlim[1]) else: ax1.set_xlim(min_progress_extrapolate, max_progress_extrapolate) if args.ylim is not None: ax1.set_ylim(args.ylim[0], args.ylim[1]) # # Plot the percentile curves of the fitted model # if not args.no_model and not args.only_densities: ax1.axvline(pm.min_progress, color='0.15', linestyle=':') ax1.axvline(pm.max_progress, color='0.15', linestyle=':') quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] grey_values = ['0.4', '0.2', '0', '0.2', '0.4'] for grey_value, quantile in zip(grey_values, quantiles): curve_int = pm.get_quantile_curve(progress_linspace_int, quantile) ax1.plot(progress_linspace_int, curve_int, color=grey_value) if not args.no_extrapolation: curve_ex1 = pm.get_quantile_curve(progress_linspace_ex1, quantile) curve_ex2 = pm.get_quantile_curve(progress_linspace_ex2, quantile) ax1.plot(progress_linspace_ex1, curve_ex1, '--', color=grey_value) ax1.plot(progress_linspace_ex2, curve_ex2, '--', color=grey_value) if args.plot_quantile_label: label = '$q={0}\%$'.format(quantile * 100) ax1.text(progress_linspace_int[-1] + 10, curve_int[-1], label, fontsize=10) if args.plot_donohue: print 'Plotting Donohue' donohue_file = os.path.join(data_handler._conf.models_folder, 'donohue', 'population_{0}.csv'.format(biomarker.replace(' ', '.'))) if not os.path.isfile(donohue_file): print log.ERROR, 'Donohue model file not found: {0}'.format(donohue_file) return r = mlab.csv2rec(donohue_file) if args.method == 'joint': offset = 2200 else: offset = 300 progrs = r[r.dtype.names[0]] * 30.44 + offset vals = r[r.dtype.names[1]] curve_donohue = [] progr_donohue = [] for p in progress_linspace_int: if progrs[0] < p < progrs[-1]: i = 1 while p > progrs[i]: i += 1 # TODO linear interpolation progr_donohue.append(progrs[i]) curve_donohue.append(vals[i]) ax1.plot(progr_donohue, curve_donohue, '--', color='b', linewidth=2) # # Plot synthetic model curve # if plot_synth_model: progress_linspace_synth = np.linspace(-2500, 2500, 100) quantiles = [0.1, 0.25, 0.5, 0.75, 0.9] alphas = [0.4, 0.7, 1.0, 0.7, 0.4] for quantile, alpha in zip(quantiles, alphas): curve_synth = [SynthModel.get_distributed_value(biomarker, p, cdf=quantile) for p in progress_linspace_synth] ax1.plot(progress_linspace_synth, curve_synth, color='b', alpha=alpha) # # Plot predictor function # if args.plot_eta is not None and not args.only_densities: # Get second axis of plot 1 ax1b = ax1.twinx() # Plot all progresses # ax1b.scatter(pm.all_progresses, pm.all_mus, facecolor='b', marker='o', edgecolor='none', alpha=0.2) ax1b.text(pm.progresses[-1], pm.sigmas[-1], '$\mu$', color='b', fontsize=11) # Plot binned progresses ax1b.scatter(pm.progresses, pm.sigmas, color='b', marker='x') # Plot interpolated model mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_int] ax1b.plot(progress_linspace_int, mus, color='b') if not args.no_extrapolation: mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex1] ax1b.plot(progress_linspace_ex1, mus, '--', color='b') mus = [pm.get_eta(pm.sigmas, p) for p in progress_linspace_ex2] ax1b.plot(progress_linspace_ex2, mus, '--', color='b') if args.xlim is not None: ax1b.set_xlim(args.xlim[0], args.xlim[1]) else: ax1b.set_xlim(min_progress_extrapolate, max_progress_extrapolate) # # Plot errors # if args.plot_errors and not args.only_densities: eval_file = model_file.replace('.csv', '_eval_cover.csv') if not os.path.isfile(eval_file): print log.ERROR, 'Evaluation file not found: {0}'.format(eval_file) else: m = mlab.csv2rec(eval_file) progresses = m['progress'] errors = m['error'] # Get second axis of plot 1 ax1b = ax1.twinx() # ax1b.set_ylim(0, max(150, 1.2 * np.max(errors))) ax1b.plot(progresses, errors, color='g', marker='x') ax1b.text(progresses[-1], errors[-1], 'Discr.', color='g', fontsize=11) ax1b.axhline(np.mean(errors), color='g', linestyle='--', alpha=0.5) median_curve = pm.get_quantile_curve(progresses, 0.5) min_value = np.min(median_curve) max_value = np.max(median_curve) rect = mpl.patches.Rectangle((progresses[0], min_value), progresses[-1] - progresses[0], max_value - min_value, fc=(0.0, 0.5, 0.0, 0.1), ec=(0.0, 0.5, 0.0, 0.8), linewidth=1) ax1.add_patch(rect) # # Plot points # if not args.no_points and not args.only_densities: samples_file = data_handler.get_samples_file(biomarker) if not os.path.isfile(samples_file): print log.ERROR, 'Samples file not found: {0}'.format(samples_file) else: m = mlab.csv2rec(samples_file) progr_points = m['progress'] value_points = m['value'] # diagn_points = [0.5 if p < 0 else 1.0 for p in progr_points] diagn_points = m['diagnosis'] diagn_points[(0.25 <= diagn_points) & (diagn_points <= 0.75)] = 0.5 print log.INFO, 'Plotting {0} sample points...'.format(len(progr_points)) ax1.scatter(progr_points, value_points, s=15.0, c=diagn_points, edgecolor='none', vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=args.points_alpha) if args.phase == 'cnmci': rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (args.points_alpha,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha,), linewidth=0)] labels = ['CN', 'MCI'] elif args.phase == 'mciad': rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (args.points_alpha,), linewidth=0)] labels = ['MCI', 'AD'] else: rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (args.points_alpha,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (args.points_alpha,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (args.points_alpha,), linewidth=0)] labels = ['CN', 'MCI', 'AD'] legend = ax1.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # # Plot PDFs # progr_samples = [-2000, -1000, 0, 1000, 2000, 3000, 4000] if args.phase == 'joint' else \ [-2000, -1500, -1000, -500, 0, 500, 1000, 1500, 2000] if args.phase == 'cnmci': vmin = -2000 vmax = 6000 elif args.phase == 'mciad': vmin = -6000 vmax = 2000 elif args.phase == 'joint': vmin = -2000 vmax = 4000 sample_cmap = cmx.ScalarMappable( norm=colors.Normalize(vmin=vmin, vmax=vmax), cmap=plt.get_cmap(pt.progression_cmap)) if not args.no_sample_lines and not args.only_densities: for progr in progr_samples: if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress: # sample_color = sample_cmap.to_rgba(progr_samples.index(progr)) sample_color = sample_cmap.to_rgba(progr) linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-' ax1.axvline(progr, color=sample_color, linestyle=linestyle, alpha=0.3) if not args.no_densities: ax2.set_title('Probability density function for {0}'.format(biomarker_string)) ax2.set_xlabel(DataHandler.get_biomarker_unit(biomarker)) ax2.set_ylabel('Probability') if args.ylim is None: values = np.linspace(min_val, max_val, 250) ax2.set_xlim(min_val, max_val) else: values = np.linspace(args.ylim[0], args.ylim[1], 250) ax2.set_xlim(args.ylim[0], args.ylim[1]) for progr in progr_samples: if not args.no_extrapolation or pm.min_progress < progr < pm.max_progress: # sample_color = sample_cmap.to_rgba(progr_samples.index(progr)) sample_color = sample_cmap.to_rgba(progr) linestyle = '--' if progr < pm.min_progress or progr > pm.max_progress else '-' probs = pm.get_density_distribution(values, progr) ax2.plot(values, probs, label=str(progr), color=sample_color, linestyle=linestyle) if plot_synth_model: probs = [SynthModel.get_probability(biomarker, progr, v) for v in values] ax2.plot(values, probs, color='b', linestyle='--') legend = ax2.legend(fontsize=10, loc='best', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # # Draw or save the plot # plt.tight_layout() if args.save_plots or args.plot_file is not None: if args.plot_file is not None: plot_filename = args.plot_file else: plot_filename = model_file.replace('.csv', '.pdf') plt.savefig(plot_filename, transparent=True) else: plt.show() plt.close(fig)
def get_biomarker_predictions(visits, predict_biomarker, method=None, biomarkers=None, phase=None, recompute_estimates=False, recompute_predictions=False, estimate_dprs=False, select_test_set=False, consistent_data=False, exclude_cn=False, use_last_visit=False, naive_use_diagnosis=False): # Get prediction file data_handler = DataHandler.get_data_handler(method=method, biomarkers=biomarkers, phase=phase) predict_biomarker_str = predict_biomarker.replace(' ', '_') predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p' if biomarkers is None: predict_file_basename = predict_file_trunk.format( predict_biomarker_str, method, '_'.join(visits), '_last' if use_last_visit else '') else: estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_') predict_file_basename = predict_file_trunk.format( predict_biomarker_str, estimate_biomarkers_string, '_'.join(visits), '_last' if use_last_visit else '') prediction_file = os.path.join(data_handler.get_eval_folder(), predict_file_basename) # Read if predictions exist, else recompute if os.path.isfile(prediction_file) and not recompute_predictions: # Read biomarker predictions from file print log.INFO, 'Reading {0} predictions from {1}...'.format( predict_biomarker, prediction_file) (rids, diagnoses, values_observed, values_naive, values_model) = pickle.load(open(prediction_file, 'rb')) else: predict_visit = get_predicted_visit(visits) print log.INFO, 'Predicting {0} at {1}...'.format( predict_biomarker, predict_visit) # Get mean changes from file mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') if not os.path.isfile(mean_changes_file): print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!' mean_changes = pickle.load(open(mean_changes_file, 'rb')) # Get DPI estimates rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates( visits, method=method, biomarkers=biomarkers, phase=phase, recompute_estimates=recompute_estimates, estimate_dprs=estimate_dprs, select_test_set=select_test_set, consistent_data=consistent_data) # Collect biomarker data for test measurements = data_handler.get_measurements_as_dict( visits=visits + [predict_visit], biomarkers=[predict_biomarker], select_test_set=select_test_set, select_complete=True) model = ProgressionModel( predict_biomarker, data_handler.get_model_file(predict_biomarker)) print log.INFO, 'Predicting {0} for {1}'.format( predict_biomarker, predict_visit) rids = [] diagnoses = [] values_observed = [] values_model = [] values_naive = [] for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis, dprs): if rid in measurements: # Get real biomarker value value at next visit scantime_first_visit = measurements[rid][visits[0]]['scantime'] scantime_next_visit = measurements[rid][predict_visit][ 'scantime'] progress_next_visit = ModelFitter.scantime_to_progress( scantime_next_visit, scantime_first_visit, dpi, dpr) value_observed = measurements[rid][predict_visit][ predict_biomarker] values_observed.append(value_observed) # Predict biomarker value value at next visit if use_last_visit: value = measurements[rid][visits[-1]][predict_biomarker] scantime = measurements[rid][visits[-1]]['scantime'] progress = ModelFitter.scantime_to_progress( scantime, scantime_first_visit, dpi, dpr) mean_quantile = model.approximate_quantile(progress, value) else: mean_quantile = 0.0 for visit in visits: value = measurements[rid][visit][predict_biomarker] scantime = measurements[rid][visit]['scantime'] progress = ModelFitter.scantime_to_progress( scantime, scantime_first_visit, dpi, dpr) mean_quantile += model.approximate_quantile( progress, value) mean_quantile /= len(visits) value_model = model.get_value_at_quantile( progress_next_visit, mean_quantile) values_model.append(value_model) # Predict biomarker value naively if naive_use_diagnosis: mean_change = mean_changes[predict_biomarker][diagnosis] else: mean_change = mean_changes[predict_biomarker][0.66] if use_last_visit: x = measurements[rid][visits[-1]]['scantime'] y = measurements[rid][visits[-1]][predict_biomarker] intercept = -(mean_change * x - y) else: x = np.zeros(len(visits)) y = np.zeros(len(visits)) for i, visit in enumerate(visits): x[i] = measurements[rid][visit]['scantime'] y[i] = measurements[rid][visit][predict_biomarker] intercept = -np.sum(mean_change * x - y) / len(x) value_naive = intercept + mean_change * measurements[rid][ predict_visit]['scantime'] values_naive.append(value_naive) # Plot estimates plot = True if plot and diagnosis > 0.0 and dpr > 0.0: plot_predictions(predict_biomarker, model, visits, measurements[rid], dpi, dpr, value_model, value_naive, mean_quantile, mean_change, intercept, rid) # Append rid and diagnosis rids.append(rid) diagnoses.append(diagnosis) # Print result print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format( predict_biomarker, rid, value_observed, value_naive, value_model) # Save results print log.INFO, 'Saving {0} predictions to {1}...'.format( predict_biomarker, prediction_file) pickle.dump( (rids, diagnoses, values_observed, values_naive, values_model), open(prediction_file, 'wb')) rids = np.array(rids) diagnoses = np.array(diagnoses) values_observed = np.array(values_observed) values_naive = np.array(values_naive) values_model = np.array(values_model) # Exclude healthy subjects if exclude_cn: indices = np.where(diagnoses > 0.25) rids = rids[indices] diagnoses = diagnoses[indices] values_observed = values_observed[indices] values_naive = values_naive[indices] values_model = values_model[indices] return rids, diagnoses, values_observed, values_naive, values_model