def select_converters(args, rids, diagnoses, dpis, dprs): ''' Select data from subjects that convert within 2 years from MCI to AD. ''' data_handler = DataHandler.get_data_handler(method=args.method) measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm12', 'm24'], no_regression=True, select_training_set=True, select_complete=True) # Select RIDSs of converters rids_select = set() for rid in measurements: if 0.25 <= measurements[rid]['bl']['DX.scan'] <= 0.75 and measurements[ rid]['m24']['DX.scan'] == 1.0: rids_select.add(rid) selected_rids = [] selected_diagnoses = [] selected_dpis = [] selected_dprs = [] for i, rid in enumerate(rids): if rid in rids_select: selected_rids.append(rid) selected_diagnoses.append(diagnoses[i]) selected_dpis.append(dpis[i]) selected_dprs.append(dprs[i]) print log.RESULT, 'Selected {0} converting subjects.'.format( len(selected_rids)) return selected_rids, selected_diagnoses, selected_dpis, selected_dprs
def select_nonconverters(args, rids, diagnoses, dpis, dprs): ''' Select data from MCI subjects that do not convert. ''' data_handler = DataHandler.get_data_handler(method=args.method) measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12', 'm24'], no_regression=True, select_test_set=True, select_complete=True) # Select RIDSs of non-converters rids_select = set() for rid in measurements: if 0.25 <= measurements[rid]['bl']['DX.scan'] <= 0.75 and 0.25 <= measurements[rid]['m24']['DX.scan'] <= 0.75: rids_select.add(rid) selected_rids = [] selected_diagnoses = [] selected_dpis = [] selected_dprs = [] for i, rid in enumerate(rids): if rid in rids_select: selected_rids.append(rid) selected_diagnoses.append(diagnoses[i]) selected_dpis.append(dpis[i]) selected_dprs.append(dprs[i]) print log.RESULT, 'Selected {0} non-converting subjects.'.format(len(selected_rids)) return selected_rids, selected_diagnoses, selected_dpis, selected_dprs
def main(): # Parse input arguments parser = argparse.ArgumentParser() parser.add_argument('--estimate_dpr', action='store_true', help='recompute the dpis estimations') parser.add_argument('--samples_file', type=str, default='measurements_sample.csv', help='recompute the dpis estimations') args = parser.parse_args() # Read the measurements as dict from the csv file measurements, biomarkers = read_measurements_from_cvs(args.samples_file) visits = measurements[0].keys() # Get estimates data_handler = DataHandler.get_data_handler(method='all', biomarkers=biomarkers, phase='joint') # Setup model model = MultiBiomarkerProgressionModel() for biomarker in biomarkers: model_file = data_handler.get_model_file(biomarker) model.add_model(biomarker, model_file) fitter = ModelFitter(model) # Estimate dpis (and dprs) and save data if args.estimate_dpr: rids, diagnoses, dpis, dprs = estimate_dpis_dprs(measurements, visits, fitter, phase='joint') else: rids, diagnoses, dpis = estimate_dpis(measurements, visits, fitter, phase='joint') dprs = np.ones(len(dpis)).tolist() # Plot the models with the fitted samples for biomarker in biomarkers: plot_biomarker(data_handler, biomarker, measurements, dpis[0], dprs[0])
def classify_converters(args, dpis_conv, dprs_conv, dpis_nonconv, dprs_nonconv): print log.INFO, 'Analysing classification accuracies...' dpis = np.concatenate((dpis_conv, dpis_nonconv)) dprs = np.concatenate((dprs_conv, dprs_nonconv)) labels = np.concatenate((np.ones(len(dpis_conv)), np.zeros(len(dpis_nonconv)))) # Assemble features features = np.zeros((len(dpis), 2)) features[:, 0] = dpis if args.estimate_dprs: features[:, 1] = dprs else: # Copy DPIs as second features as LDA needs two features features[:, 1] = dpis features = preprocessing.scale(features) acc, sens, spec = run_classification(args, features, labels) print log.RESULT, '{0}-fold cross validation, converters vs. non-converters ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format(args.num_folds, acc, sens, spec) if args.latex_file is not None: data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) print log.INFO, 'Writing classification results to {0}...'.format(filename) with open(filename, 'a') as latex_file: latex_file.write('{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format( args.method, len(args.visits), acc, sens, spec))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default='mciad', choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator') parser.add_argument('--xlim', type=float, nargs=2, default=None, help='force certain x limits for plotting') parser.add_argument('--ylim', type=float, nargs=2, default=None, help='force certain y limits for plotting') parser.add_argument('--no_model', action='store_true', default=False, help='do not plot the fitted model') parser.add_argument('--no_points', action='store_true', default=False, help='do not plot points') parser.add_argument('--points_alpha', type=float, default=0.25, help='alpha value of the plotted points') parser.add_argument('--no_densities', action='store_true', default=False, help='do not plot densities') parser.add_argument('--no_sample_lines', action='store_true', default=False, help='do not plot the sample lines') parser.add_argument('--only_densities', action='store_true', default=False, help='only plot densities') parser.add_argument('--no_extrapolation', action='store_true', default=False, help='do not extrapolate the model') parser.add_argument('--plot_eta', type=str, choices=['lambda', 'mu', 'sigma'], default=None, help='plot a predictor function') parser.add_argument('--plot_errors', action='store_true', default=False, help='plot the errors') parser.add_argument('--plot_synth_model', action='store_true', default=False, help='plot density distributions for synthetic data') parser.add_argument('--plot_quantile_label', action='store_true', default=False, help='plot labels on the quantile curces') parser.add_argument('--plot_donohue', action='store_true', default=False, help='plot the trajectory estimated with Donohue et al.') parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') args = parser.parse_args() data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) for biomarker in data_handler.get_biomarker_names(): plot_model(args, data_handler, biomarker)
def classify_converters(args, dpis_conv, dprs_conv, dpis_nonconv, dprs_nonconv): print log.INFO, 'Analysing classification accuracies...' dpis = np.concatenate((dpis_conv, dpis_nonconv)) dprs = np.concatenate((dprs_conv, dprs_nonconv)) labels = np.concatenate( (np.ones(len(dpis_conv)), np.zeros(len(dpis_nonconv)))) # Assemble features features = np.zeros((len(dpis), 2)) features[:, 0] = dpis if args.estimate_dprs: features[:, 1] = dprs else: # Copy DPIs as second features as LDA needs two features features[:, 1] = dpis features = preprocessing.scale(features) acc, sens, spec = run_classification(args, features, labels) print log.RESULT, '{0}-fold cross validation, converters vs. non-converters ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format( args.num_folds, acc, sens, spec) if args.latex_file is not None: data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) print log.INFO, 'Writing classification results to {0}...'.format( filename) with open(filename, 'a') as latex_file: latex_file.write( '{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format( args.method, len(args.visits), acc, sens, spec))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default='mciad', choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator') args = parser.parse_args() data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) biomarkers = data_handler.get_biomarker_names() if args.method == 'joint': offsets = np.linspace(500, 3000, 26) else: offsets = np.linspace(-1000, 1000, 21) all_diffs = np.zeros((len(offsets), len(biomarkers))) for i, biomarker in enumerate(biomarkers): diffs = get_model_differences(args, data_handler, biomarker, offsets) all_diffs[:, i] = diffs print biomarker, offsets[np.argmin(diffs)] optimum_index = np.argmin(np.mean(all_diffs, axis=1)) print 'all', offsets[optimum_index] mins = all_diffs[optimum_index, :] # np.min(all_diffs, axis=0) indices = np.argsort(mins) for i in indices: print biomarkers[i], mins[i] fig = plt.figure() ax1 = plt.subplot(1, 1, 1) ax1.plot(offsets, all_diffs, color='r') ax1.plot(offsets, np.mean(all_diffs, axis=1), color='b') plt.show() plt.close(fig)
def print_to_latex(args, results_naive, results_model, num_subjects): data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) with open(filename, 'a') as latex_file: latex_file.write( '{0} & {1} {2} & ${3:.2f}\pm{4:.2f}$ & ${5:.2f}$ & ${6:.2f}\pm{7:.2f}$ & ${8:.2f}$ & {9}\\\\\n' .format(args.predict_biomarker, args.method, len(args.visits), results_naive['MEAN'], results_naive['STD'], results_naive['CORR'], results_model['MEAN'], results_model['STD'], results_model['CORR'], num_subjects))
def main(): parser = argparse.ArgumentParser(description='Estimate model curves for biomarkers using VGAM.') parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator') parser.add_argument('--plot_threshold', type=float, default=0.3, help='the threshold above which praphs are plotted') parser.add_argument('--recompute_errors', action='store_true', help='recompute the matrix containing the fitting errors') parser.add_argument('--search_range', nargs=3, default=(1000, 5000, 10), help='the range in which the offset is sought') args = parser.parse_args() # Get the data files and biomarkers data_handler_joint = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase='joint') biomarkers, offsets, errors, descriminativeness, overlap = get_fitting_data(args, data_handler_joint) # Plot single biomarker fits fig, ax = plt.subplots() pt.setup_axes(plt, ax, xgrid=False) ax.set_title('Optimal offset between CN/MCI and MCI/AD models') ax.set_xlabel('Offset (days)') ax.set_ylabel('Fitting error') for i, biomarker in enumerate(biomarkers): if descriminativeness[i] > args.plot_threshold: print log.RESULT, 'Min error for {0} at {1}'.format(biomarker, offsets[np.argmin(errors[i, :])]) ax.plot(offsets, errors[i, :], label=biomarker, linestyle='--') # Get optimal offset mean_errors = np.mean(errors, 0) weighted_mean_errors = np.dot(errors.T, descriminativeness) / np.sum(descriminativeness) # Plot joint fit ax.plot(offsets, mean_errors, label='Mean', linewidth=2, color='g') ax.plot(offsets, weighted_mean_errors, label='Weighted mean', linewidth=2, color='r') # Get and lot optimal offset optimal_offset = offsets[np.argmin(mean_errors)] optimal_offset_weighted = offsets[np.argmin(weighted_mean_errors)] print log.RESULT, 'Optimal threshold: {0}'.format(optimal_offset) print log.RESULT, 'Optimal threshold (weighted): {0}'.format(optimal_offset_weighted) ax.axvline(optimal_offset, linestyle=':', color='g') ax.axvline(optimal_offset_weighted, linestyle=':', color='r') # Plot overlap ax.axvline(overlap, color='0.15', linestyle=':') ax.legend() plt.show() plt.close(fig)
def analyse_decline(args, rids, dpis, dprs, rds, non_rds): print log.INFO, 'Analysing classification accuracies...' # dpis = np.array(dpis) # dprs = np.array(dprs) # labels = np.array([1 if rid in rds else 0 for rid in rids]) dpis_rds = [] dpis_nonrds = [] dprs_rds = [] dprs_nonrds = [] for rid, dpi, dpr in zip(rids, dpis, dprs): if rid in rds: dpis_rds.append(dpi) dprs_rds.append(dpr) elif rid in non_rds: dpis_nonrds.append(dpi) dprs_nonrds.append(dpr) dpis = np.concatenate((dpis_rds, dpis_nonrds)) dprs = np.concatenate((dprs_rds, dprs_nonrds)) labels = np.concatenate( (np.ones(len(dpis_rds)), np.zeros(len(dpis_nonrds)))) # Assemble features features = np.zeros((len(dpis), 2)) features[:, 0] = dpis if args.estimate_dprs: features[:, 1] = dprs else: # Copy DPIs as second features as LDA needs two features features[:, 1] = dpis features = preprocessing.scale(features) acc, sens, spec = run_classification(args, features, labels) print log.RESULT, '{0}-fold cross validation, RD vs. non-RD ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format( args.num_folds, acc, sens, spec) if args.latex_file is not None: data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) print log.INFO, 'Writing classification results to {0}...'.format( filename) with open(filename, 'a') as latex_file: latex_file.write( '{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format( args.method, len(args.visits), acc, sens, spec))
def main(): # Collect data for test data_handler = DataHandler.get_data_handler() biomarkers = DataHandler.get_all_biomarker_names() mean_changes = {} for biomarker in biomarkers: measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm12'], biomarkers=[biomarker], select_complete=True) mean_changes_biomarker = {0.0: 0.0, 0.25: 0.0, 0.75: 0.0, 1.0: 0.0} num_subjects = {0.0: 0, 0.25: 0, 0.75: 0, 1.0: 0} for rid in measurements: diagnosis = measurements[rid]['bl']['DX.scan'] value_bl = measurements[rid]['bl'][biomarker] value_y1 = measurements[rid]['m12'][biomarker] scantime_bl = measurements[rid]['bl']['scantime'] scantime_y1 = measurements[rid]['m12']['scantime'] change = (value_y1 - value_bl) / (scantime_y1 - scantime_bl) mean_changes_biomarker[diagnosis] += change num_subjects[diagnosis] += 1 mean_change_mci_ad = mean_changes_biomarker[ 0.25] + mean_changes_biomarker[0.75] + mean_changes_biomarker[1.0] num_subjects_mci_ad = num_subjects[0.25] + num_subjects[ 0.75] + num_subjects[1.0] for diagnosis in mean_changes_biomarker: mean_changes_biomarker[diagnosis] /= num_subjects[diagnosis] mean_changes_biomarker.update( {0.66: mean_change_mci_ad / num_subjects_mci_ad}) mean_changes.update({biomarker: mean_changes_biomarker}) print log.RESULT, '{0} CN: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[0.0], num_subjects[0.0]) print log.RESULT, '{0} EMCI: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[0.25], num_subjects[0.25]) print log.RESULT, '{0} LMCI: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[0.75], num_subjects[0.75]) print log.RESULT, '{0} AD: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[1.0], num_subjects[1.0]) mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') pickle.dump(mean_changes, open(mean_changes_file, 'wb'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-b', '--biomarkers', nargs=2, default=['D1', 'D2'], help='name of the biomarker to be plotted') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(biomarkers=args.biomarkers) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(biomarkers=biomarkers, select_complete=True) # Collect biomarker values biomarkers_1 = [] biomarkers_2 = [] diagnoses = [] for rid in measurements: for visit in measurements[rid]: biomarkers_1.append(measurements[rid][visit][biomarkers[0]]) biomarkers_2.append(measurements[rid][visit][biomarkers[1]]) diagnoses.append(measurements[rid][visit]['DX.scan']) diagnoses = np.array(diagnoses) diagnoses[(0.25 <= diagnoses) & (diagnoses <= 0.75)] = 0.5 # Setup plot fig, ax = plt.subplots() pt.setup_axes(plt, ax) ax.scatter(biomarkers_1, biomarkers_2, s=15.0, c=diagnoses, edgecolor='none', vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=0.25) ax.set_xlabel(biomarkers[0]) ax.set_ylabel(biomarkers[1]) # Plot legend rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (0.25,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (0.25,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (0.25,), linewidth=0)] labels = ['CN', 'MCI', 'AD'] legend = ax.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # Draw or save the plot plt.tight_layout() if args.plot_file is not None: plt.savefig(args.plot_file, transparent=True) else: plt.show() plt.close(fig)
def print_to_latex(args, results_naive, results_model, num_subjects): data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) with open(filename, 'a') as latex_file: latex_file.write('{0} & {1} {2} & ${3:.2f}\pm{4:.2f}$ & ${5:.2f}$ & ${6:.2f}\pm{7:.2f}$ & ${8:.2f}$ & {9}\\\\\n'.format( args.predict_biomarker, args.method, len(args.visits), results_naive['MEAN'], results_naive['STD'], results_naive['CORR'], results_model['MEAN'], results_model['STD'], results_model['CORR'], num_subjects))
def main(): parser = argparse.ArgumentParser( description='Estimate model curves for biomarkers using VGAM.') parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-n', '--nr_threads', type=int, default=1, help='number of threads') parser.add_argument('--min_visits', type=int, default=0, help='the minimal number of visits') parser.add_argument( '--no_regression', action='store_true', default=False, help='do not perform age regression of biomarker values') parser.add_argument('--recompute_models', action='store_true', help='recompute the models with new samples') args = parser.parse_args() # Get the data files and biomarkers data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) # Estimate curves # generate_csv_file(args, data_handler) # print_gender_statistics(args, data_handler) print_terminal_decline_statistics(args, data_handler)
def analyse_decline(args, rids, dpis, dprs, rds, non_rds): print log.INFO, 'Analysing classification accuracies...' # dpis = np.array(dpis) # dprs = np.array(dprs) # labels = np.array([1 if rid in rds else 0 for rid in rids]) dpis_rds = [] dpis_nonrds = [] dprs_rds = [] dprs_nonrds = [] for rid, dpi, dpr in zip(rids, dpis, dprs): if rid in rds: dpis_rds.append(dpi) dprs_rds.append(dpr) elif rid in non_rds: dpis_nonrds.append(dpi) dprs_nonrds.append(dpr) dpis = np.concatenate((dpis_rds, dpis_nonrds)) dprs = np.concatenate((dprs_rds, dprs_nonrds)) labels = np.concatenate((np.ones(len(dpis_rds)), np.zeros(len(dpis_nonrds)))) # Assemble features features = np.zeros((len(dpis), 2)) features[:, 0] = dpis if args.estimate_dprs: features[:, 1] = dprs else: # Copy DPIs as second features as LDA needs two features features[:, 1] = dpis features = preprocessing.scale(features) acc, sens, spec = run_classification(args, features, labels) print log.RESULT, '{0}-fold cross validation, RD vs. non-RD ACC={1:.2f}, SENS={2:.2f}, SPEC={3:.2f}'.format(args.num_folds, acc, sens, spec) if args.latex_file is not None: data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) filename = os.path.join(data_handler.get_eval_folder(), args.latex_file) print log.INFO, 'Writing classification results to {0}...'.format(filename) with open(filename, 'a') as latex_file: latex_file.write('{0} & {1} & {2:.2f} & {3:.2f} & {4:.2f}\\\\\n'.format( args.method, len(args.visits), acc, sens, spec))
def main(): parser = argparse.ArgumentParser(description='Estimate model curves for biomarkers using VGAM.') parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-n', '--nr_threads', type=int, default=1, help='number of threads') parser.add_argument('--min_visits', type=int, default=0, help='the minimal number of visits') parser.add_argument('--no_regression', action='store_true', default=False, help='do not perform age regression of biomarker values') parser.add_argument('--recompute_models', action='store_true', help='recompute the models with new samples') args = parser.parse_args() # Get the data files and biomarkers data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) # Estimate curves # generate_csv_file(args, data_handler) # print_gender_statistics(args, data_handler) print_terminal_decline_statistics(args, data_handler)
def main(): # Collect data for test data_handler = DataHandler.get_data_handler() biomarkers = DataHandler.get_all_biomarker_names() mean_changes = {} for biomarker in biomarkers: measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12'], biomarkers=[biomarker], select_complete=True) mean_changes_biomarker = {0.0: 0.0, 0.25: 0.0, 0.75: 0.0, 1.0: 0.0} num_subjects = {0.0: 0, 0.25: 0, 0.75: 0, 1.0: 0} for rid in measurements: diagnosis = measurements[rid]['bl']['DX.scan'] value_bl = measurements[rid]['bl'][biomarker] value_y1 = measurements[rid]['m12'][biomarker] scantime_bl = measurements[rid]['bl']['scantime'] scantime_y1 = measurements[rid]['m12']['scantime'] change = (value_y1 - value_bl) / (scantime_y1 - scantime_bl) mean_changes_biomarker[diagnosis] += change num_subjects[diagnosis] += 1 mean_change_mci_ad = mean_changes_biomarker[0.25] + mean_changes_biomarker[0.75] + mean_changes_biomarker[1.0] num_subjects_mci_ad = num_subjects[0.25] + num_subjects[0.75] + num_subjects[1.0] for diagnosis in mean_changes_biomarker: mean_changes_biomarker[diagnosis] /= num_subjects[diagnosis] mean_changes_biomarker.update({0.66: mean_change_mci_ad / num_subjects_mci_ad}) mean_changes.update({biomarker: mean_changes_biomarker}) print log.RESULT, '{0} CN: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.0], num_subjects[0.0]) print log.RESULT, '{0} EMCI: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.25], num_subjects[0.25]) print log.RESULT, '{0} LMCI: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.75], num_subjects[0.75]) print log.RESULT, '{0} AD: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[1.0], num_subjects[1.0]) mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') pickle.dump(mean_changes, open(mean_changes_file, 'wb'))
def get_rfds(args, rids, diagnoses, dpis, dprs): data_handler = DataHandler.get_data_handler() measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm24'], biomarkers=['FAQ'], select_complete=True, no_regression=True) rfds = set() non_rfds = set() for rid in rids: if rid in measurements: faq_bl = measurements[rid]['bl']['FAQ'] faq_m24 = measurements[rid]['m24']['FAQ'] rcd = (faq_m24 - faq_bl) >= 10 if rcd: rfds.add(rid) else: non_rfds.add(rid) print log.RESULT, 'Selected {0} subjects with rapid functional decline (RFD).'.format(len(rfds)) print log.RESULT, 'Selected {0} subjects without rapid functional decline (non-RFD).'.format(len(non_rfds)) return rfds, non_rfds
def get_rcds(args, rids, diagnoses, dpis, dprs): data_handler = DataHandler.get_data_handler() measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm24'], biomarkers=['MMSE'], select_complete=True, no_regression=True) rcds = set() non_rcds = set() for rid in rids: if rid in measurements: mmse_bl = measurements[rid]['bl']['MMSE'] mmse_m24 = measurements[rid]['m24']['MMSE'] rcd = (mmse_bl - mmse_m24) >= 8 if rcd: rcds.add(rid) else: non_rcds.add(rid) print log.RESULT, 'Selected {0} subjects with rapid cognitive decline (RCD).'.format(len(rcds)) print log.RESULT, 'Selected {0} subjects without rapid cognitive decline (non-RCD).'.format(len(non_rcds)) return rcds, non_rcds
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-n', '--nr_threads', type=int, default=4, help='number of threads') parser.add_argument('--recompute_metric', action='store_true', help='recompute the metric') parser.add_argument('--value_samples', type=int, default=100, help='the number of values samples') parser.add_argument('--progress_samples', type=int, default=50, help='the number of progress samples') parser.add_argument('--quantiles', type=float, nargs=2, default=[0.01, 0.99], help='the quantiles for the interval computation') parser.add_argument('--metric', type=str, default='cover', help='the metric used for the evaluation') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) # Compute error for each biomarker biomarkers = data_handler.get_biomarker_names() evaluation_function = evaluate_biomarker_cover if args.metric == 'cover' else evaluate_biomarker_disc jl.Parallel(n_jobs=args.nr_threads)(jl.delayed(evaluation_function)(args, data_handler, biomarker) for biomarker in biomarkers) sort_biomarkers(args, data_handler, biomarkers)
def get_rfds(args, rids, diagnoses, dpis, dprs): data_handler = DataHandler.get_data_handler() measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm24'], biomarkers=['FAQ'], select_complete=True, no_regression=True) rfds = set() non_rfds = set() for rid in rids: if rid in measurements: faq_bl = measurements[rid]['bl']['FAQ'] faq_m24 = measurements[rid]['m24']['FAQ'] rcd = (faq_m24 - faq_bl) >= 10 if rcd: rfds.add(rid) else: non_rfds.add(rid) print log.RESULT, 'Selected {0} subjects with rapid functional decline (RFD).'.format( len(rfds)) print log.RESULT, 'Selected {0} subjects without rapid functional decline (non-RFD).'.format( len(non_rfds)) return rfds, non_rfds
def get_rcds(args, rids, diagnoses, dpis, dprs): data_handler = DataHandler.get_data_handler() measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm24'], biomarkers=['MMSE'], select_complete=True, no_regression=True) rcds = set() non_rcds = set() for rid in rids: if rid in measurements: mmse_bl = measurements[rid]['bl']['MMSE'] mmse_m24 = measurements[rid]['m24']['MMSE'] rcd = (mmse_bl - mmse_m24) >= 8 if rcd: rcds.add(rid) else: non_rcds.add(rid) print log.RESULT, 'Selected {0} subjects with rapid cognitive decline (RCD).'.format( len(rcds)) print log.RESULT, 'Selected {0} subjects without rapid cognitive decline (non-RCD).'.format( len(non_rcds)) return rcds, non_rcds
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm12'], biomarkers=biomarkers, select_training_set=True, select_complete=True) # Setup plotting folder eval_folder = DataHandler.make_dir(data_handler.get_eval_folder(), 'quants') # Process all biomarkers for biomarker in biomarkers: print log.INFO, 'Generating quantile correlation plot for {0}...'.format( biomarker) model_file = data_handler.get_model_file(biomarker) pm = ProgressionModel(biomarker, model_file) q_file = os.path.join(eval_folder, '{0}.p'.format(biomarker)) if os.path.isfile(q_file): (q_bl, q_m12) = pickle.load(open(q_file, 'rb')) else: q_bl = [] q_m12 = [] for rid in measurements: val_bl = measurements[rid]['bl'][biomarker] val_m12 = measurements[rid]['m12'][biomarker] p_bl = measurements[rid]['bl']['progress'] p_m12 = measurements[rid]['m12']['progress'] q_bl.append(pm.approximate_quantile(p_bl, val_bl)) q_m12.append(pm.approximate_quantile(p_m12, val_m12)) pickle.dump((q_bl, q_m12), open(q_file, 'wb')) # Setup plot fig, axs = plt.subplots(1, 2) plt.suptitle('Correlation between bl and m12 quantiles') # Plot 1 ax = axs[0] pt.setup_axes(plt, ax, yspine=True) ax.set_xlabel('Quantile bl') ax.set_ylabel('Quantile m12') ax.scatter(q_bl, q_m12, edgecolor='none', s=25.0, alpha=0.5) # Plot 2 q_bl = np.array(q_bl) q_m12 = np.array(q_m12) errors = q_bl - q_m12 loc, scale = norm.fit(errors, floc=0.0) ax = axs[1] pt.setup_axes(plt, ax) ax.set_xlabel('Difference bl to m12') ax.set_ylabel('Probability') ax.set_xlim(-1.05, 1.05) ax.hist(errors, bins=15, normed=True, histtype='stepfilled', alpha=0.3) x = np.linspace(-1.0, 1.0, 100) ax.plot(x, norm.pdf(x, loc=loc, scale=scale), color='k') # Draw or save the plot plt.tight_layout() if args.save_plots: plot_file = os.path.join(eval_folder, '{0}.pdf'.format(biomarker)) plt.savefig(plot_file, transparent=True) else: plt.show() plt.close(fig)
def get_fitting_data(args, data_handler_joint): biomarkers = data_handler_joint.get_biomarker_names() offsets = range(args.search_range[0], args.search_range[1], args.search_range[2]) errors_file = os.path.join(data_handler_joint.get_eval_folder(), 'offset_errors_{0}.p'.format(args.extrapolator)) if os.path.isfile(errors_file) and not args.recompute_errors: print log.INFO, 'Reading errors estimations from file {0}...'.format(errors_file) (errors, descriminativeness, overlap) = pickle.load(open(errors_file, 'rb')) else: data_handler_1 = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase='cnmci') data_handler_2 = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase='mciad') errors = np.zeros((len(biomarkers), len(offsets))) descriminativeness = np.zeros(len(biomarkers)) overlap = [] for i, biomarker in enumerate(biomarkers): # Get error matrix for all biomarkers and offsets model_file_1 = data_handler_1.get_model_file(biomarker) model_file_2 = data_handler_2.get_model_file(biomarker) if os.path.isfile(model_file_1) and os.path.isfile(model_file_2): print log.INFO, 'Analysing {0}...'.format(biomarker) # Get discriminativeness for all biomarkers as a scaling factor eval_file_1 = model_file_1.replace('.csv', '_eval_cover.csv') eval_file_2 = model_file_2.replace('.csv', '_eval_cover.csv') if os.path.isfile(eval_file_1) and os.path.isfile(eval_file_2): descriminate_1 = np.mean(mlab.csv2rec(eval_file_1)['error']) descriminate_2 = np.mean(mlab.csv2rec(eval_file_2)['error']) descriminativeness[i] = 0.5 * (descriminate_1 + descriminate_2) else: print log.WARNING, 'Evaluation file missing for {0}'.format(biomarker) continue # Initialise models model_1 = ProgressionModel(biomarker, model_file_1, extrapolator=args.extrapolator) model_2 = ProgressionModel(biomarker, model_file_2, extrapolator=args.extrapolator) # Assemble errors for each offset min_val_1, max_val_1 = model_1.get_value_range([0.1, 0.9]) min_val_2, max_val_2 = model_2.get_value_range([0.1, 0.9]) values = np.linspace(min(min_val_1, min_val_2), max(max_val_1, max_val_2), 250) values_delta = (values.max() - values.min()) / len(values) for j, offset in enumerate(offsets): dens_11 = np.array(model_1.get_density_distribution(values, offset + model_2.min_progress)) dens_12 = np.array(model_2.get_density_distribution(values, model_2.min_progress)) dens_21 = np.array(model_1.get_density_distribution(values, model_1.max_progress)) dens_22 = np.array(model_2.get_density_distribution(values, -offset + model_1.max_progress)) errors[i, j] = 0.5 * values_delta * (np.sum(np.abs(dens_11 - dens_12)) + np.sum(np.abs(dens_21 - dens_22))) # Get overlap overlap.append(model_1.max_progress - model_2.min_progress) overlap = np.mean(overlap) print log.INFO, 'Saving errors to file {0}...'.format(errors_file) pickle.dump((errors, descriminativeness, overlap), open(errors_file, 'wb')) return biomarkers, offsets, errors, descriminativeness, overlap
def get_progress_estimates(visits, method=None, biomarkers=None, phase=None, recompute_estimates=False, estimate_dprs=False, consistent_data=False, select_training_set=False, select_test_set=False): # Get data handler and biomarker names data_handler = DataHandler.get_data_handler(method=method, biomarkers=biomarkers, phase=phase) # Get filename estimates_file_trunk = 'estimate_dpi_dpr_with_{0}_{1}.p' if estimate_dprs else 'estimate_dpi_with_{0}_{1}.p' if biomarkers is None: estimates_file_basename = estimates_file_trunk.format(method, '_'.join(visits)) else: biomarkers_string = '_'.join(biomarkers).replace(' ', '_') estimates_file_basename = estimates_file_trunk.format(biomarkers_string, '_'.join(visits)) estimates_file = os.path.join(data_handler.get_eval_folder(), estimates_file_basename) # Read if estimates exist, else recompute if os.path.isfile(estimates_file) and not recompute_estimates: # Read test results from file print log.INFO, 'Reading DPI{0} estimations from {1}...'.format('\DPR' if estimate_dprs else '', estimates_file) (rids, diagnoses, dpis, dprs, mean_min, mean_max) = pickle.load(open(estimates_file, 'rb')) else: # Collect data for test biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12', 'm24'], biomarkers=biomarkers, select_complete=True) # Setup model model = MultiBiomarkerProgressionModel() for biomarker in biomarkers: model_file = data_handler.get_model_file(biomarker) model.add_model(biomarker, model_file) fitter = ModelFitter(model) # Calculate mean and max progress mean_min = model.get_mean_min_progress() mean_max = model.get_mean_max_progress() # Estimate dpis (and dprs) and save data if not estimate_dprs or len(visits) == 1: if estimate_dprs and len(visits) == 1: print log.WARNING, 'Only one visit, cannot estimate DPR (setting to one)' rids, diagnoses, dpis = estimate_dpis(measurements, visits, fitter, phase=phase) dprs = np.ones(len(dpis)).tolist() else: rids, diagnoses, dpis, dprs = estimate_dpis_dprs(measurements, visits, fitter, phase=phase) print log.INFO, 'Saving DPI{0} estimations to {1}...'.format('\DPR' if estimate_dprs else '', estimates_file) pickle.dump((rids, diagnoses, dpis, dprs, mean_min, mean_max), open(estimates_file, 'wb')) # Reduce to consistent data sets with bl, m12 and m24 samples if consistent_data or select_training_set or select_test_set: consistent_method = 'all' if consistent_data else method consistent_data_handler = DataHandler.get_data_handler(method=consistent_method) consistent_measurements = consistent_data_handler.get_measurements_as_dict( visits=['bl', 'm12', 'm24'], select_training_set=select_training_set, select_test_set=select_test_set, select_complete=True, no_regression=True) consistent_rids = [] consistent_diagnoses = [] consistent_dpis = [] consistent_dprs = [] for i, rid in enumerate(rids): if rid in consistent_measurements: consistent_rids.append(rid) consistent_diagnoses.append(diagnoses[i]) consistent_dpis.append(dpis[i]) consistent_dprs.append(dprs[i]) rids = consistent_rids diagnoses = consistent_diagnoses dpis = consistent_dpis dprs = consistent_dprs print log.RESULT, 'Selected {0} consistent subjects.'.format(len(dpis)) # Return results return rids, diagnoses, dpis, dprs, mean_min, mean_max
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default=None, choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12'], biomarkers=biomarkers, select_training_set=True, select_complete=True) # Setup plotting folder eval_folder = DataHandler.make_dir(data_handler.get_eval_folder(), 'quants') # Process all biomarkers for biomarker in biomarkers: print log.INFO, 'Generating quantile correlation plot for {0}...'.format(biomarker) model_file = data_handler.get_model_file(biomarker) pm = ProgressionModel(biomarker, model_file) q_file = os.path.join(eval_folder, '{0}.p'.format(biomarker)) if os.path.isfile(q_file): (q_bl, q_m12) = pickle.load(open(q_file, 'rb')) else: q_bl = [] q_m12 = [] for rid in measurements: val_bl = measurements[rid]['bl'][biomarker] val_m12 = measurements[rid]['m12'][biomarker] p_bl = measurements[rid]['bl']['progress'] p_m12 = measurements[rid]['m12']['progress'] q_bl.append(pm.approximate_quantile(p_bl, val_bl)) q_m12.append(pm.approximate_quantile(p_m12, val_m12)) pickle.dump((q_bl, q_m12), open(q_file, 'wb')) # Setup plot fig, axs = plt.subplots(1, 2) plt.suptitle('Correlation between bl and m12 quantiles') # Plot 1 ax = axs[0] pt.setup_axes(plt, ax, yspine=True) ax.set_xlabel('Quantile bl') ax.set_ylabel('Quantile m12') ax.scatter(q_bl, q_m12, edgecolor='none', s=25.0, alpha=0.5) # Plot 2 q_bl = np.array(q_bl) q_m12 = np.array(q_m12) errors = q_bl - q_m12 loc, scale = norm.fit(errors, floc=0.0) ax = axs[1] pt.setup_axes(plt, ax) ax.set_xlabel('Difference bl to m12') ax.set_ylabel('Probability') ax.set_xlim(-1.05, 1.05) ax.hist(errors, bins=15, normed=True, histtype='stepfilled', alpha=0.3) x = np.linspace(-1.0, 1.0, 100) ax.plot(x, norm.pdf(x, loc=loc, scale=scale), color='k') # Draw or save the plot plt.tight_layout() if args.save_plots: plot_file = os.path.join(eval_folder, '{0}.pdf'.format(biomarker)) plt.savefig(plot_file, transparent=True) else: plt.show() plt.close(fig)
def plot_dpi_estimates(args, dpis, diagnoses, mean_min, mean_max): print log.INFO, 'Plotting estimates...' test_dpi_min, test_dpi_max, _ = ModelFitter.get_test_dpi_range(args.phase) dpi_range = float(test_dpi_max - test_dpi_min) dpi_factor = float(args.plot_steps) / dpi_range # Setup plot fig, ax = plt.subplots(figsize=(6, 2)) biomarkers_str = args.method if args.biomarkers is None else ', '.join( args.biomarkers) ax.set_title('DP estimation using {0} at {1}'.format( biomarkers_str, ', '.join(args.visits))) ax.spines['left'].set_position(('outward', 10)) ax.spines['bottom'].set_position(('outward', 10)) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') xticks = np.linspace(0, args.plot_steps, 7) ax.set_xticks(xticks) ax.set_xticklabels( [int(float(tick) / dpi_factor + test_dpi_min) for tick in xticks]) # Compute matrix diagnosis_indices = {0.0: 0, 0.25: 1, 0.5: 1, 0.75: 2, 1.0: 3} matrix = np.zeros((4, args.plot_steps + 1)) for dpi, diag in zip(dpis, diagnoses): row = diagnosis_indices[diag] dpi_index = round((dpi - test_dpi_min) * dpi_factor) matrix[row, dpi_index] += 1.0 # Draw annotations dpis = np.array(dpis) diagnoses = np.array(diagnoses) medians = [] q25 = [] q75 = [] for diag in [0.0, 0.25, 0.75, 1.0]: row = diagnosis_indices[diag] matrix[row] /= np.sum(matrix[row]) indices = np.where(diagnoses == diag) median = np.median(dpis[indices]) medians.append((median - test_dpi_min) * dpi_factor) q25.append((median - np.percentile(dpis[indices], 25)) * dpi_factor) q75.append((np.percentile(dpis[indices], 75) - median) * dpi_factor) if args.plot_lines: ax.set_ylim(-0.01, 0.36) sample_cmap = cmx.ScalarMappable(norm=colors.Normalize(0.0, 1.0), cmap=plt.get_cmap( pt.progression_cmap)) for diag in [0.0, 0.25, 0.75, 1.0]: row = diagnosis_indices[diag] plt.plot(matrix[row], color=sample_cmap.to_rgba(diag)) else: ax.set_yticks([0, 1, 2, 3]) ax.set_yticklabels(['CN', 'EMCI', 'LMCI', 'AD']) cmap = plt.get_cmap('jet') if args.plot_cmap_jet else plt.get_cmap( 'Greys') bar_color = 'w' if args.plot_cmap_jet else 'r' plt.errorbar(medians, [0, 1, 2, 3], xerr=[q25, q75], fmt='none', ecolor=bar_color, elinewidth=2, capsize=4, capthick=2) plt.plot(medians, [0, 1, 2, 3], linestyle='', color=bar_color, marker='|', markersize=15, markeredgewidth=2) plt.imshow(matrix, cmap=cmap, interpolation='nearest') plt.axvline((mean_min - test_dpi_min) * dpi_factor, color='k', linestyle=':', alpha=0.6) plt.axvline((mean_max - test_dpi_min) * dpi_factor, color='k', linestyle=':', alpha=0.6) plt.axvline((0.0 - test_dpi_min) * dpi_factor, color='k', linestyle='-', alpha=0.6) if args.phase == 'joint': data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) plt.axvline( (data_handler.get_model_offset() - test_dpi_min) * dpi_factor, color='k', linestyle='-', alpha=0.6) # Draw or save the plot plt.tight_layout() if args.plot_file is not None: plt.savefig(args.plot_file, transparent=True) else: plt.show() plt.close(fig)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-b', '--biomarkers', nargs=2, default=['D1', 'D2'], help='name of the biomarker to be plotted') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') args = parser.parse_args() # Collect data for test data_handler = DataHandler.get_data_handler(biomarkers=args.biomarkers) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(biomarkers=biomarkers, select_complete=True) # Collect biomarker values biomarkers_1 = [] biomarkers_2 = [] diagnoses = [] for rid in measurements: for visit in measurements[rid]: biomarkers_1.append(measurements[rid][visit][biomarkers[0]]) biomarkers_2.append(measurements[rid][visit][biomarkers[1]]) diagnoses.append(measurements[rid][visit]['DX.scan']) diagnoses = np.array(diagnoses) diagnoses[(0.25 <= diagnoses) & (diagnoses <= 0.75)] = 0.5 # Setup plot fig, ax = plt.subplots() pt.setup_axes(plt, ax) ax.scatter(biomarkers_1, biomarkers_2, s=15.0, c=diagnoses, edgecolor='none', vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=0.25) ax.set_xlabel(biomarkers[0]) ax.set_ylabel(biomarkers[1]) # Plot legend rects = [ mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (0.25, ), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (0.25, ), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (0.25, ), linewidth=0) ] labels = ['CN', 'MCI', 'AD'] legend = ax.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # Draw or save the plot plt.tight_layout() if args.plot_file is not None: plt.savefig(args.plot_file, transparent=True) else: plt.show() plt.close(fig)
def get_biomarker_predictions(visits, predict_biomarker, method=None, biomarkers=None, phase=None, recompute_estimates=False, recompute_predictions=False, estimate_dprs=False, select_test_set=False, consistent_data=False, exclude_cn=False, use_last_visit=False, naive_use_diagnosis=False): # Get prediction file data_handler = DataHandler.get_data_handler(method=method, biomarkers=biomarkers, phase=phase) predict_biomarker_str = predict_biomarker.replace(' ', '_') predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p' if biomarkers is None: predict_file_basename = predict_file_trunk.format(predict_biomarker_str, method, '_'.join(visits), '_last' if use_last_visit else '') else: estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_') predict_file_basename = predict_file_trunk.format(predict_biomarker_str, estimate_biomarkers_string, '_'.join(visits), '_last' if use_last_visit else '') prediction_file = os.path.join(data_handler.get_eval_folder(), predict_file_basename) # Read if predictions exist, else recompute if os.path.isfile(prediction_file) and not recompute_predictions: # Read biomarker predictions from file print log.INFO, 'Reading {0} predictions from {1}...'.format(predict_biomarker, prediction_file) (rids, diagnoses, values_observed, values_naive, values_model) = pickle.load(open(prediction_file, 'rb')) else: predict_visit = get_predicted_visit(visits) print log.INFO, 'Predicting {0} at {1}...'.format(predict_biomarker, predict_visit) # Get mean changes from file mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') if not os.path.isfile(mean_changes_file): print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!' mean_changes = pickle.load(open(mean_changes_file, 'rb')) # Get DPI estimates rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates(visits, method=method, biomarkers=biomarkers, phase=phase, recompute_estimates=recompute_estimates, estimate_dprs=estimate_dprs, select_test_set=select_test_set, consistent_data=consistent_data) # Collect biomarker data for test measurements = data_handler.get_measurements_as_dict(visits=visits + [predict_visit], biomarkers=[predict_biomarker], select_test_set=select_test_set, select_complete=True) model = ProgressionModel(predict_biomarker, data_handler.get_model_file(predict_biomarker)) print log.INFO, 'Predicting {0} for {1}'.format(predict_biomarker, predict_visit) rids = [] diagnoses = [] values_observed = [] values_model = [] values_naive = [] for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis, dprs): if rid in measurements: # Get real biomarker value value at next visit scantime_first_visit = measurements[rid][visits[0]]['scantime'] scantime_next_visit = measurements[rid][predict_visit]['scantime'] progress_next_visit = ModelFitter.scantime_to_progress(scantime_next_visit, scantime_first_visit, dpi, dpr) value_observed = measurements[rid][predict_visit][predict_biomarker] values_observed.append(value_observed) # Predict biomarker value value at next visit if use_last_visit: value = measurements[rid][visits[-1]][predict_biomarker] scantime = measurements[rid][visits[-1]]['scantime'] progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr) mean_quantile = model.approximate_quantile(progress, value) else: mean_quantile = 0.0 for visit in visits: value = measurements[rid][visit][predict_biomarker] scantime = measurements[rid][visit]['scantime'] progress = ModelFitter.scantime_to_progress(scantime, scantime_first_visit, dpi, dpr) mean_quantile += model.approximate_quantile(progress, value) mean_quantile /= len(visits) value_model = model.get_value_at_quantile(progress_next_visit, mean_quantile) values_model.append(value_model) # Predict biomarker value naively if naive_use_diagnosis: mean_change = mean_changes[predict_biomarker][diagnosis] else: mean_change = mean_changes[predict_biomarker][0.66] if use_last_visit: x = measurements[rid][visits[-1]]['scantime'] y = measurements[rid][visits[-1]][predict_biomarker] intercept = -(mean_change * x - y) else: x = np.zeros(len(visits)) y = np.zeros(len(visits)) for i, visit in enumerate(visits): x[i] = measurements[rid][visit]['scantime'] y[i] = measurements[rid][visit][predict_biomarker] intercept = -np.sum(mean_change * x - y) / len(x) value_naive = intercept + mean_change * measurements[rid][predict_visit]['scantime'] values_naive.append(value_naive) # Plot estimates plot = True if plot and diagnosis > 0.0 and dpr > 0.0: plot_predictions(predict_biomarker, model, visits, measurements[rid], dpi, dpr, value_model, value_naive, mean_quantile, mean_change, intercept, rid) # Append rid and diagnosis rids.append(rid) diagnoses.append(diagnosis) # Print result print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format(predict_biomarker, rid, value_observed, value_naive, value_model) # Save results print log.INFO, 'Saving {0} predictions to {1}...'.format(predict_biomarker, prediction_file) pickle.dump((rids, diagnoses, values_observed, values_naive, values_model), open(prediction_file, 'wb')) rids = np.array(rids) diagnoses = np.array(diagnoses) values_observed = np.array(values_observed) values_naive = np.array(values_naive) values_model = np.array(values_model) # Exclude healthy subjects if exclude_cn: indices = np.where(diagnoses > 0.25) rids = rids[indices] diagnoses = diagnoses[indices] values_observed = values_observed[indices] values_naive = values_naive[indices] values_model = values_model[indices] return rids, diagnoses, values_observed, values_naive, values_model
def get_progress_estimates(visits, method=None, biomarkers=None, phase=None, recompute_estimates=False, estimate_dprs=False, consistent_data=False, select_training_set=False, select_test_set=False): # Get data handler and biomarker names data_handler = DataHandler.get_data_handler(method=method, biomarkers=biomarkers, phase=phase) # Get filename estimates_file_trunk = 'estimate_dpi_dpr_with_{0}_{1}.p' if estimate_dprs else 'estimate_dpi_with_{0}_{1}.p' if biomarkers is None: estimates_file_basename = estimates_file_trunk.format( method, '_'.join(visits)) else: biomarkers_string = '_'.join(biomarkers).replace(' ', '_') estimates_file_basename = estimates_file_trunk.format( biomarkers_string, '_'.join(visits)) estimates_file = os.path.join(data_handler.get_eval_folder(), estimates_file_basename) # Read if estimates exist, else recompute if os.path.isfile(estimates_file) and not recompute_estimates: # Read test results from file print log.INFO, 'Reading DPI{0} estimations from {1}...'.format( '\DPR' if estimate_dprs else '', estimates_file) (rids, diagnoses, dpis, dprs, mean_min, mean_max) = pickle.load(open(estimates_file, 'rb')) else: # Collect data for test biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm12', 'm24'], biomarkers=biomarkers, select_complete=True) # Setup model model = MultiBiomarkerProgressionModel() for biomarker in biomarkers: model_file = data_handler.get_model_file(biomarker) model.add_model(biomarker, model_file) fitter = ModelFitter(model) # Calculate mean and max progress mean_min = model.get_mean_min_progress() mean_max = model.get_mean_max_progress() # Estimate dpis (and dprs) and save data if not estimate_dprs or len(visits) == 1: if estimate_dprs and len(visits) == 1: print log.WARNING, 'Only one visit, cannot estimate DPR (setting to one)' rids, diagnoses, dpis = estimate_dpis(measurements, visits, fitter, phase=phase) dprs = np.ones(len(dpis)).tolist() else: rids, diagnoses, dpis, dprs = estimate_dpis_dprs(measurements, visits, fitter, phase=phase) print log.INFO, 'Saving DPI{0} estimations to {1}...'.format( '\DPR' if estimate_dprs else '', estimates_file) pickle.dump((rids, diagnoses, dpis, dprs, mean_min, mean_max), open(estimates_file, 'wb')) # Reduce to consistent data sets with bl, m12 and m24 samples if consistent_data or select_training_set or select_test_set: consistent_method = 'all' if consistent_data else method consistent_data_handler = DataHandler.get_data_handler( method=consistent_method) consistent_measurements = consistent_data_handler.get_measurements_as_dict( visits=['bl', 'm12', 'm24'], select_training_set=select_training_set, select_test_set=select_test_set, select_complete=True, no_regression=True) consistent_rids = [] consistent_diagnoses = [] consistent_dpis = [] consistent_dprs = [] for i, rid in enumerate(rids): if rid in consistent_measurements: consistent_rids.append(rid) consistent_diagnoses.append(diagnoses[i]) consistent_dpis.append(dpis[i]) consistent_dprs.append(dprs[i]) rids = consistent_rids diagnoses = consistent_diagnoses dpis = consistent_dpis dprs = consistent_dprs print log.RESULT, 'Selected {0} consistent subjects.'.format(len(dpis)) # Return results return rids, diagnoses, dpis, dprs, mean_min, mean_max
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--method', choices=DataHandler.get_method_choices(), default='all', help='the method to collect data for') parser.add_argument('-b', '--biomarkers', nargs='+', default=None, help='name of the biomarker to be plotted') parser.add_argument('-p', '--phase', default='mciad', choices=DataHandler.get_phase_choices(), help='the phase for which the model is to be trained') parser.add_argument('-e', '--extrapolator', type=str, choices=['lin', 'sqrt', 'exp'], default='exp', help='the type of extrapolator') parser.add_argument('--xlim', type=float, nargs=2, default=None, help='force certain x limits for plotting') parser.add_argument('--ylim', type=float, nargs=2, default=None, help='force certain y limits for plotting') parser.add_argument('--no_model', action='store_true', default=False, help='do not plot the fitted model') parser.add_argument('--no_points', action='store_true', default=False, help='do not plot points') parser.add_argument('--points_alpha', type=float, default=0.25, help='alpha value of the plotted points') parser.add_argument('--no_densities', action='store_true', default=False, help='do not plot densities') parser.add_argument('--no_sample_lines', action='store_true', default=False, help='do not plot the sample lines') parser.add_argument('--only_densities', action='store_true', default=False, help='only plot densities') parser.add_argument('--no_extrapolation', action='store_true', default=False, help='do not extrapolate the model') parser.add_argument('--plot_eta', type=str, choices=['lambda', 'mu', 'sigma'], default=None, help='plot a predictor function') parser.add_argument('--plot_errors', action='store_true', default=False, help='plot the errors') parser.add_argument('--plot_synth_model', action='store_true', default=False, help='plot density distributions for synthetic data') parser.add_argument('--plot_quantile_label', action='store_true', default=False, help='plot labels on the quantile curces') parser.add_argument( '--plot_donohue', action='store_true', default=False, help='plot the trajectory estimated with Donohue et al.') parser.add_argument('--save_plots', action='store_true', default=False, help='save the plots with a default filename') parser.add_argument('--plot_file', type=str, default=None, help='filename of the output file') args = parser.parse_args() data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) for biomarker in data_handler.get_biomarker_names(): plot_model(args, data_handler, biomarker)
def plot_dpi_estimates(args, dpis, diagnoses, mean_min, mean_max): print log.INFO, 'Plotting estimates...' test_dpi_min, test_dpi_max, _ = ModelFitter.get_test_dpi_range(args.phase) dpi_range = float(test_dpi_max - test_dpi_min) dpi_factor = float(args.plot_steps) / dpi_range # Setup plot fig, ax = plt.subplots(figsize=(6, 2)) biomarkers_str = args.method if args.biomarkers is None else ', '.join(args.biomarkers) ax.set_title('DP estimation using {0} at {1}'.format(biomarkers_str, ', '.join(args.visits))) ax.spines['left'].set_position(('outward', 10)) ax.spines['bottom'].set_position(('outward', 10)) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') xticks = np.linspace(0, args.plot_steps, 7) ax.set_xticks(xticks) ax.set_xticklabels([int(float(tick) / dpi_factor + test_dpi_min) for tick in xticks]) # Compute matrix diagnosis_indices = {0.0: 0, 0.25: 1, 0.5: 1, 0.75: 2, 1.0: 3} matrix = np.zeros((4, args.plot_steps + 1)) for dpi, diag in zip(dpis, diagnoses): row = diagnosis_indices[diag] dpi_index = round((dpi - test_dpi_min) * dpi_factor) matrix[row, dpi_index] += 1.0 # Draw annotations dpis = np.array(dpis) diagnoses = np.array(diagnoses) medians = [] q25 = [] q75 = [] for diag in [0.0, 0.25, 0.75, 1.0]: row = diagnosis_indices[diag] matrix[row] /= np.sum(matrix[row]) indices = np.where(diagnoses == diag) median = np.median(dpis[indices]) medians.append((median - test_dpi_min) * dpi_factor) q25.append((median - np.percentile(dpis[indices], 25)) * dpi_factor) q75.append((np.percentile(dpis[indices], 75) - median) * dpi_factor) if args.plot_lines: ax.set_ylim(-0.01, 0.36) sample_cmap = cmx.ScalarMappable( norm=colors.Normalize(0.0, 1.0), cmap=plt.get_cmap(pt.progression_cmap)) for diag in [0.0, 0.25, 0.75, 1.0]: row = diagnosis_indices[diag] plt.plot(matrix[row], color=sample_cmap.to_rgba(diag)) else: ax.set_yticks([0, 1, 2, 3]) ax.set_yticklabels(['CN', 'EMCI', 'LMCI', 'AD']) cmap = plt.get_cmap('jet') if args.plot_cmap_jet else plt.get_cmap('Greys') bar_color = 'w' if args.plot_cmap_jet else 'r' plt.errorbar(medians, [0, 1, 2, 3], xerr=[q25, q75], fmt='none', ecolor=bar_color, elinewidth=2, capsize=4, capthick=2) plt.plot(medians, [0, 1, 2, 3], linestyle='', color=bar_color, marker='|', markersize=15, markeredgewidth=2) plt.imshow(matrix, cmap=cmap, interpolation='nearest') plt.axvline((mean_min - test_dpi_min) * dpi_factor, color='k', linestyle=':', alpha=0.6) plt.axvline((mean_max - test_dpi_min) * dpi_factor, color='k', linestyle=':', alpha=0.6) plt.axvline((0.0 - test_dpi_min) * dpi_factor, color='k', linestyle='-', alpha=0.6) if args.phase == 'joint': data_handler = DataHandler.get_data_handler(method=args.method, biomarkers=args.biomarkers, phase=args.phase) plt.axvline((data_handler.get_model_offset() - test_dpi_min) * dpi_factor, color='k', linestyle='-', alpha=0.6) # Draw or save the plot plt.tight_layout() if args.plot_file is not None: plt.savefig(args.plot_file, transparent=True) else: plt.show() plt.close(fig)
def get_biomarker_predictions(visits, predict_biomarker, method=None, biomarkers=None, phase=None, recompute_estimates=False, recompute_predictions=False, estimate_dprs=False, select_test_set=False, consistent_data=False, exclude_cn=False, use_last_visit=False, naive_use_diagnosis=False): # Get prediction file data_handler = DataHandler.get_data_handler(method=method, biomarkers=biomarkers, phase=phase) predict_biomarker_str = predict_biomarker.replace(' ', '_') predict_file_trunk = 'predict_{0}_with_dpr_{1}_{2}{3}.p' if estimate_dprs else 'predict_{0}_with_{1}_{2}{3}.p' if biomarkers is None: predict_file_basename = predict_file_trunk.format( predict_biomarker_str, method, '_'.join(visits), '_last' if use_last_visit else '') else: estimate_biomarkers_string = '_'.join(biomarkers).replace(' ', '_') predict_file_basename = predict_file_trunk.format( predict_biomarker_str, estimate_biomarkers_string, '_'.join(visits), '_last' if use_last_visit else '') prediction_file = os.path.join(data_handler.get_eval_folder(), predict_file_basename) # Read if predictions exist, else recompute if os.path.isfile(prediction_file) and not recompute_predictions: # Read biomarker predictions from file print log.INFO, 'Reading {0} predictions from {1}...'.format( predict_biomarker, prediction_file) (rids, diagnoses, values_observed, values_naive, values_model) = pickle.load(open(prediction_file, 'rb')) else: predict_visit = get_predicted_visit(visits) print log.INFO, 'Predicting {0} at {1}...'.format( predict_biomarker, predict_visit) # Get mean changes from file mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') if not os.path.isfile(mean_changes_file): print log.ERROR, 'Mean changes unknown, run misc/compute_mean_biomarker_changes.py first!' mean_changes = pickle.load(open(mean_changes_file, 'rb')) # Get DPI estimates rids_all, diagnoses_all, dpis, dprs, _, _ = get_progress_estimates( visits, method=method, biomarkers=biomarkers, phase=phase, recompute_estimates=recompute_estimates, estimate_dprs=estimate_dprs, select_test_set=select_test_set, consistent_data=consistent_data) # Collect biomarker data for test measurements = data_handler.get_measurements_as_dict( visits=visits + [predict_visit], biomarkers=[predict_biomarker], select_test_set=select_test_set, select_complete=True) model = ProgressionModel( predict_biomarker, data_handler.get_model_file(predict_biomarker)) print log.INFO, 'Predicting {0} for {1}'.format( predict_biomarker, predict_visit) rids = [] diagnoses = [] values_observed = [] values_model = [] values_naive = [] for rid, diagnosis, dpi, dpr in zip(rids_all, diagnoses_all, dpis, dprs): if rid in measurements: # Get real biomarker value value at next visit scantime_first_visit = measurements[rid][visits[0]]['scantime'] scantime_next_visit = measurements[rid][predict_visit][ 'scantime'] progress_next_visit = ModelFitter.scantime_to_progress( scantime_next_visit, scantime_first_visit, dpi, dpr) value_observed = measurements[rid][predict_visit][ predict_biomarker] values_observed.append(value_observed) # Predict biomarker value value at next visit if use_last_visit: value = measurements[rid][visits[-1]][predict_biomarker] scantime = measurements[rid][visits[-1]]['scantime'] progress = ModelFitter.scantime_to_progress( scantime, scantime_first_visit, dpi, dpr) mean_quantile = model.approximate_quantile(progress, value) else: mean_quantile = 0.0 for visit in visits: value = measurements[rid][visit][predict_biomarker] scantime = measurements[rid][visit]['scantime'] progress = ModelFitter.scantime_to_progress( scantime, scantime_first_visit, dpi, dpr) mean_quantile += model.approximate_quantile( progress, value) mean_quantile /= len(visits) value_model = model.get_value_at_quantile( progress_next_visit, mean_quantile) values_model.append(value_model) # Predict biomarker value naively if naive_use_diagnosis: mean_change = mean_changes[predict_biomarker][diagnosis] else: mean_change = mean_changes[predict_biomarker][0.66] if use_last_visit: x = measurements[rid][visits[-1]]['scantime'] y = measurements[rid][visits[-1]][predict_biomarker] intercept = -(mean_change * x - y) else: x = np.zeros(len(visits)) y = np.zeros(len(visits)) for i, visit in enumerate(visits): x[i] = measurements[rid][visit]['scantime'] y[i] = measurements[rid][visit][predict_biomarker] intercept = -np.sum(mean_change * x - y) / len(x) value_naive = intercept + mean_change * measurements[rid][ predict_visit]['scantime'] values_naive.append(value_naive) # Plot estimates plot = True if plot and diagnosis > 0.0 and dpr > 0.0: plot_predictions(predict_biomarker, model, visits, measurements[rid], dpi, dpr, value_model, value_naive, mean_quantile, mean_change, intercept, rid) # Append rid and diagnosis rids.append(rid) diagnoses.append(diagnosis) # Print result print log.RESULT, '{0} for subject {1}: Observed: {2}, Naive {3}, Model: {4}'.format( predict_biomarker, rid, value_observed, value_naive, value_model) # Save results print log.INFO, 'Saving {0} predictions to {1}...'.format( predict_biomarker, prediction_file) pickle.dump( (rids, diagnoses, values_observed, values_naive, values_model), open(prediction_file, 'wb')) rids = np.array(rids) diagnoses = np.array(diagnoses) values_observed = np.array(values_observed) values_naive = np.array(values_naive) values_model = np.array(values_model) # Exclude healthy subjects if exclude_cn: indices = np.where(diagnoses > 0.25) rids = rids[indices] diagnoses = diagnoses[indices] values_observed = values_observed[indices] values_naive = values_naive[indices] values_model = values_model[indices] return rids, diagnoses, values_observed, values_naive, values_model